| 1 | n/a | #! /usr/bin/env python3 |
|---|
| 2 | n/a | # -*- coding: iso-8859-1 -*- |
|---|
| 3 | n/a | # Originally written by Barry Warsaw <barry@python.org> |
|---|
| 4 | n/a | # |
|---|
| 5 | n/a | # Minimally patched to make it even more xgettext compatible |
|---|
| 6 | n/a | # by Peter Funk <pf@artcom-gmbh.de> |
|---|
| 7 | n/a | # |
|---|
| 8 | n/a | # 2002-11-22 Jürgen Hermann <jh@web.de> |
|---|
| 9 | n/a | # Added checks that _() only contains string literals, and |
|---|
| 10 | n/a | # command line args are resolved to module lists, i.e. you |
|---|
| 11 | n/a | # can now pass a filename, a module or package name, or a |
|---|
| 12 | n/a | # directory (including globbing chars, important for Win32). |
|---|
| 13 | n/a | # Made docstring fit in 80 chars wide displays using pydoc. |
|---|
| 14 | n/a | # |
|---|
| 15 | n/a | |
|---|
| 16 | n/a | # for selftesting |
|---|
| 17 | n/a | try: |
|---|
| 18 | n/a | import fintl |
|---|
| 19 | n/a | _ = fintl.gettext |
|---|
| 20 | n/a | except ImportError: |
|---|
| 21 | n/a | _ = lambda s: s |
|---|
| 22 | n/a | |
|---|
| 23 | n/a | __doc__ = _("""pygettext -- Python equivalent of xgettext(1) |
|---|
| 24 | n/a | |
|---|
| 25 | n/a | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
|---|
| 26 | n/a | internationalization of C programs. Most of these tools are independent of |
|---|
| 27 | n/a | the programming language and can be used from within Python programs. |
|---|
| 28 | n/a | Martin von Loewis' work[1] helps considerably in this regard. |
|---|
| 29 | n/a | |
|---|
| 30 | n/a | There's one problem though; xgettext is the program that scans source code |
|---|
| 31 | n/a | looking for message strings, but it groks only C (or C++). Python |
|---|
| 32 | n/a | introduces a few wrinkles, such as dual quoting characters, triple quoted |
|---|
| 33 | n/a | strings, and raw strings. xgettext understands none of this. |
|---|
| 34 | n/a | |
|---|
| 35 | n/a | Enter pygettext, which uses Python's standard tokenize module to scan |
|---|
| 36 | n/a | Python source code, generating .pot files identical to what GNU xgettext[2] |
|---|
| 37 | n/a | generates for C and C++ code. From there, the standard GNU tools can be |
|---|
| 38 | n/a | used. |
|---|
| 39 | n/a | |
|---|
| 40 | n/a | A word about marking Python strings as candidates for translation. GNU |
|---|
| 41 | n/a | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, |
|---|
| 42 | n/a | and gettext_noop. But those can be a lot of text to include all over your |
|---|
| 43 | n/a | code. C and C++ have a trick: they use the C preprocessor. Most |
|---|
| 44 | n/a | internationalized C source includes a #define for gettext() to _() so that |
|---|
| 45 | n/a | what has to be written in the source is much less. Thus these are both |
|---|
| 46 | n/a | translatable strings: |
|---|
| 47 | n/a | |
|---|
| 48 | n/a | gettext("Translatable String") |
|---|
| 49 | n/a | _("Translatable String") |
|---|
| 50 | n/a | |
|---|
| 51 | n/a | Python of course has no preprocessor so this doesn't work so well. Thus, |
|---|
| 52 | n/a | pygettext searches only for _() by default, but see the -k/--keyword flag |
|---|
| 53 | n/a | below for how to augment this. |
|---|
| 54 | n/a | |
|---|
| 55 | n/a | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html |
|---|
| 56 | n/a | [2] http://www.gnu.org/software/gettext/gettext.html |
|---|
| 57 | n/a | |
|---|
| 58 | n/a | NOTE: pygettext attempts to be option and feature compatible with GNU |
|---|
| 59 | n/a | xgettext where ever possible. However some options are still missing or are |
|---|
| 60 | n/a | not fully implemented. Also, xgettext's use of command line switches with |
|---|
| 61 | n/a | option arguments is broken, and in these cases, pygettext just defines |
|---|
| 62 | n/a | additional switches. |
|---|
| 63 | n/a | |
|---|
| 64 | n/a | Usage: pygettext [options] inputfile ... |
|---|
| 65 | n/a | |
|---|
| 66 | n/a | Options: |
|---|
| 67 | n/a | |
|---|
| 68 | n/a | -a |
|---|
| 69 | n/a | --extract-all |
|---|
| 70 | n/a | Extract all strings. |
|---|
| 71 | n/a | |
|---|
| 72 | n/a | -d name |
|---|
| 73 | n/a | --default-domain=name |
|---|
| 74 | n/a | Rename the default output file from messages.pot to name.pot. |
|---|
| 75 | n/a | |
|---|
| 76 | n/a | -E |
|---|
| 77 | n/a | --escape |
|---|
| 78 | n/a | Replace non-ASCII characters with octal escape sequences. |
|---|
| 79 | n/a | |
|---|
| 80 | n/a | -D |
|---|
| 81 | n/a | --docstrings |
|---|
| 82 | n/a | Extract module, class, method, and function docstrings. These do |
|---|
| 83 | n/a | not need to be wrapped in _() markers, and in fact cannot be for |
|---|
| 84 | n/a | Python to consider them docstrings. (See also the -X option). |
|---|
| 85 | n/a | |
|---|
| 86 | n/a | -h |
|---|
| 87 | n/a | --help |
|---|
| 88 | n/a | Print this help message and exit. |
|---|
| 89 | n/a | |
|---|
| 90 | n/a | -k word |
|---|
| 91 | n/a | --keyword=word |
|---|
| 92 | n/a | Keywords to look for in addition to the default set, which are: |
|---|
| 93 | n/a | %(DEFAULTKEYWORDS)s |
|---|
| 94 | n/a | |
|---|
| 95 | n/a | You can have multiple -k flags on the command line. |
|---|
| 96 | n/a | |
|---|
| 97 | n/a | -K |
|---|
| 98 | n/a | --no-default-keywords |
|---|
| 99 | n/a | Disable the default set of keywords (see above). Any keywords |
|---|
| 100 | n/a | explicitly added with the -k/--keyword option are still recognized. |
|---|
| 101 | n/a | |
|---|
| 102 | n/a | --no-location |
|---|
| 103 | n/a | Do not write filename/lineno location comments. |
|---|
| 104 | n/a | |
|---|
| 105 | n/a | -n |
|---|
| 106 | n/a | --add-location |
|---|
| 107 | n/a | Write filename/lineno location comments indicating where each |
|---|
| 108 | n/a | extracted string is found in the source. These lines appear before |
|---|
| 109 | n/a | each msgid. The style of comments is controlled by the -S/--style |
|---|
| 110 | n/a | option. This is the default. |
|---|
| 111 | n/a | |
|---|
| 112 | n/a | -o filename |
|---|
| 113 | n/a | --output=filename |
|---|
| 114 | n/a | Rename the default output file from messages.pot to filename. If |
|---|
| 115 | n/a | filename is `-' then the output is sent to standard out. |
|---|
| 116 | n/a | |
|---|
| 117 | n/a | -p dir |
|---|
| 118 | n/a | --output-dir=dir |
|---|
| 119 | n/a | Output files will be placed in directory dir. |
|---|
| 120 | n/a | |
|---|
| 121 | n/a | -S stylename |
|---|
| 122 | n/a | --style stylename |
|---|
| 123 | n/a | Specify which style to use for location comments. Two styles are |
|---|
| 124 | n/a | supported: |
|---|
| 125 | n/a | |
|---|
| 126 | n/a | Solaris # File: filename, line: line-number |
|---|
| 127 | n/a | GNU #: filename:line |
|---|
| 128 | n/a | |
|---|
| 129 | n/a | The style name is case insensitive. GNU style is the default. |
|---|
| 130 | n/a | |
|---|
| 131 | n/a | -v |
|---|
| 132 | n/a | --verbose |
|---|
| 133 | n/a | Print the names of the files being processed. |
|---|
| 134 | n/a | |
|---|
| 135 | n/a | -V |
|---|
| 136 | n/a | --version |
|---|
| 137 | n/a | Print the version of pygettext and exit. |
|---|
| 138 | n/a | |
|---|
| 139 | n/a | -w columns |
|---|
| 140 | n/a | --width=columns |
|---|
| 141 | n/a | Set width of output to columns. |
|---|
| 142 | n/a | |
|---|
| 143 | n/a | -x filename |
|---|
| 144 | n/a | --exclude-file=filename |
|---|
| 145 | n/a | Specify a file that contains a list of strings that are not be |
|---|
| 146 | n/a | extracted from the input files. Each string to be excluded must |
|---|
| 147 | n/a | appear on a line by itself in the file. |
|---|
| 148 | n/a | |
|---|
| 149 | n/a | -X filename |
|---|
| 150 | n/a | --no-docstrings=filename |
|---|
| 151 | n/a | Specify a file that contains a list of files (one per line) that |
|---|
| 152 | n/a | should not have their docstrings extracted. This is only useful in |
|---|
| 153 | n/a | conjunction with the -D option above. |
|---|
| 154 | n/a | |
|---|
| 155 | n/a | If `inputfile' is -, standard input is read. |
|---|
| 156 | n/a | """) |
|---|
| 157 | n/a | |
|---|
| 158 | n/a | import os |
|---|
| 159 | n/a | import importlib.machinery |
|---|
| 160 | n/a | import importlib.util |
|---|
| 161 | n/a | import sys |
|---|
| 162 | n/a | import glob |
|---|
| 163 | n/a | import time |
|---|
| 164 | n/a | import getopt |
|---|
| 165 | n/a | import token |
|---|
| 166 | n/a | import tokenize |
|---|
| 167 | n/a | |
|---|
| 168 | n/a | __version__ = '1.5' |
|---|
| 169 | n/a | |
|---|
| 170 | n/a | default_keywords = ['_'] |
|---|
| 171 | n/a | DEFAULTKEYWORDS = ', '.join(default_keywords) |
|---|
| 172 | n/a | |
|---|
| 173 | n/a | EMPTYSTRING = '' |
|---|
| 174 | n/a | |
|---|
| 175 | n/a | |
|---|
| 176 | n/a | |
|---|
| 177 | n/a | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's |
|---|
| 178 | n/a | # there. |
|---|
| 179 | n/a | pot_header = _('''\ |
|---|
| 180 | n/a | # SOME DESCRIPTIVE TITLE. |
|---|
| 181 | n/a | # Copyright (C) YEAR ORGANIZATION |
|---|
| 182 | n/a | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. |
|---|
| 183 | n/a | # |
|---|
| 184 | n/a | msgid "" |
|---|
| 185 | n/a | msgstr "" |
|---|
| 186 | n/a | "Project-Id-Version: PACKAGE VERSION\\n" |
|---|
| 187 | n/a | "POT-Creation-Date: %(time)s\\n" |
|---|
| 188 | n/a | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" |
|---|
| 189 | n/a | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" |
|---|
| 190 | n/a | "Language-Team: LANGUAGE <LL@li.org>\\n" |
|---|
| 191 | n/a | "MIME-Version: 1.0\\n" |
|---|
| 192 | n/a | "Content-Type: text/plain; charset=%(charset)s\\n" |
|---|
| 193 | n/a | "Content-Transfer-Encoding: %(encoding)s\\n" |
|---|
| 194 | n/a | "Generated-By: pygettext.py %(version)s\\n" |
|---|
| 195 | n/a | |
|---|
| 196 | n/a | ''') |
|---|
| 197 | n/a | |
|---|
| 198 | n/a | |
|---|
| 199 | n/a | def usage(code, msg=''): |
|---|
| 200 | n/a | print(__doc__ % globals(), file=sys.stderr) |
|---|
| 201 | n/a | if msg: |
|---|
| 202 | n/a | print(msg, file=sys.stderr) |
|---|
| 203 | n/a | sys.exit(code) |
|---|
| 204 | n/a | |
|---|
| 205 | n/a | |
|---|
| 206 | n/a | |
|---|
| 207 | n/a | def make_escapes(pass_nonascii): |
|---|
| 208 | n/a | global escapes, escape |
|---|
| 209 | n/a | if pass_nonascii: |
|---|
| 210 | n/a | # Allow non-ascii characters to pass through so that e.g. 'msgid |
|---|
| 211 | n/a | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we |
|---|
| 212 | n/a | # escape any character outside the 32..126 range. |
|---|
| 213 | n/a | mod = 128 |
|---|
| 214 | n/a | escape = escape_ascii |
|---|
| 215 | n/a | else: |
|---|
| 216 | n/a | mod = 256 |
|---|
| 217 | n/a | escape = escape_nonascii |
|---|
| 218 | n/a | escapes = [r"\%03o" % i for i in range(mod)] |
|---|
| 219 | n/a | for i in range(32, 127): |
|---|
| 220 | n/a | escapes[i] = chr(i) |
|---|
| 221 | n/a | escapes[ord('\\')] = r'\\' |
|---|
| 222 | n/a | escapes[ord('\t')] = r'\t' |
|---|
| 223 | n/a | escapes[ord('\r')] = r'\r' |
|---|
| 224 | n/a | escapes[ord('\n')] = r'\n' |
|---|
| 225 | n/a | escapes[ord('\"')] = r'\"' |
|---|
| 226 | n/a | |
|---|
| 227 | n/a | |
|---|
| 228 | n/a | def escape_ascii(s, encoding): |
|---|
| 229 | n/a | return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) |
|---|
| 230 | n/a | |
|---|
| 231 | n/a | def escape_nonascii(s, encoding): |
|---|
| 232 | n/a | return ''.join(escapes[b] for b in s.encode(encoding)) |
|---|
| 233 | n/a | |
|---|
| 234 | n/a | |
|---|
| 235 | n/a | def safe_eval(s): |
|---|
| 236 | n/a | # unwrap quotes, safely |
|---|
| 237 | n/a | return eval(s, {'__builtins__':{}}, {}) |
|---|
| 238 | n/a | |
|---|
| 239 | n/a | |
|---|
| 240 | n/a | def normalize(s, encoding): |
|---|
| 241 | n/a | # This converts the various Python string types into a format that is |
|---|
| 242 | n/a | # appropriate for .po files, namely much closer to C style. |
|---|
| 243 | n/a | lines = s.split('\n') |
|---|
| 244 | n/a | if len(lines) == 1: |
|---|
| 245 | n/a | s = '"' + escape(s, encoding) + '"' |
|---|
| 246 | n/a | else: |
|---|
| 247 | n/a | if not lines[-1]: |
|---|
| 248 | n/a | del lines[-1] |
|---|
| 249 | n/a | lines[-1] = lines[-1] + '\n' |
|---|
| 250 | n/a | for i in range(len(lines)): |
|---|
| 251 | n/a | lines[i] = escape(lines[i], encoding) |
|---|
| 252 | n/a | lineterm = '\\n"\n"' |
|---|
| 253 | n/a | s = '""\n"' + lineterm.join(lines) + '"' |
|---|
| 254 | n/a | return s |
|---|
| 255 | n/a | |
|---|
| 256 | n/a | |
|---|
| 257 | n/a | def containsAny(str, set): |
|---|
| 258 | n/a | """Check whether 'str' contains ANY of the chars in 'set'""" |
|---|
| 259 | n/a | return 1 in [c in str for c in set] |
|---|
| 260 | n/a | |
|---|
| 261 | n/a | |
|---|
| 262 | n/a | def _visit_pyfiles(list, dirname, names): |
|---|
| 263 | n/a | """Helper for getFilesForName().""" |
|---|
| 264 | n/a | # get extension for python source files |
|---|
| 265 | n/a | if '_py_ext' not in globals(): |
|---|
| 266 | n/a | global _py_ext |
|---|
| 267 | n/a | _py_ext = importlib.machinery.SOURCE_SUFFIXES[0] |
|---|
| 268 | n/a | |
|---|
| 269 | n/a | # don't recurse into CVS directories |
|---|
| 270 | n/a | if 'CVS' in names: |
|---|
| 271 | n/a | names.remove('CVS') |
|---|
| 272 | n/a | |
|---|
| 273 | n/a | # add all *.py files to list |
|---|
| 274 | n/a | list.extend( |
|---|
| 275 | n/a | [os.path.join(dirname, file) for file in names |
|---|
| 276 | n/a | if os.path.splitext(file)[1] == _py_ext] |
|---|
| 277 | n/a | ) |
|---|
| 278 | n/a | |
|---|
| 279 | n/a | |
|---|
| 280 | n/a | def getFilesForName(name): |
|---|
| 281 | n/a | """Get a list of module files for a filename, a module or package name, |
|---|
| 282 | n/a | or a directory. |
|---|
| 283 | n/a | """ |
|---|
| 284 | n/a | if not os.path.exists(name): |
|---|
| 285 | n/a | # check for glob chars |
|---|
| 286 | n/a | if containsAny(name, "*?[]"): |
|---|
| 287 | n/a | files = glob.glob(name) |
|---|
| 288 | n/a | list = [] |
|---|
| 289 | n/a | for file in files: |
|---|
| 290 | n/a | list.extend(getFilesForName(file)) |
|---|
| 291 | n/a | return list |
|---|
| 292 | n/a | |
|---|
| 293 | n/a | # try to find module or package |
|---|
| 294 | n/a | try: |
|---|
| 295 | n/a | spec = importlib.util.find_spec(name) |
|---|
| 296 | n/a | name = spec.origin |
|---|
| 297 | n/a | except ImportError: |
|---|
| 298 | n/a | name = None |
|---|
| 299 | n/a | if not name: |
|---|
| 300 | n/a | return [] |
|---|
| 301 | n/a | |
|---|
| 302 | n/a | if os.path.isdir(name): |
|---|
| 303 | n/a | # find all python files in directory |
|---|
| 304 | n/a | list = [] |
|---|
| 305 | n/a | os.walk(name, _visit_pyfiles, list) |
|---|
| 306 | n/a | return list |
|---|
| 307 | n/a | elif os.path.exists(name): |
|---|
| 308 | n/a | # a single file |
|---|
| 309 | n/a | return [name] |
|---|
| 310 | n/a | |
|---|
| 311 | n/a | return [] |
|---|
| 312 | n/a | |
|---|
| 313 | n/a | |
|---|
| 314 | n/a | class TokenEater: |
|---|
| 315 | n/a | def __init__(self, options): |
|---|
| 316 | n/a | self.__options = options |
|---|
| 317 | n/a | self.__messages = {} |
|---|
| 318 | n/a | self.__state = self.__waiting |
|---|
| 319 | n/a | self.__data = [] |
|---|
| 320 | n/a | self.__lineno = -1 |
|---|
| 321 | n/a | self.__freshmodule = 1 |
|---|
| 322 | n/a | self.__curfile = None |
|---|
| 323 | n/a | |
|---|
| 324 | n/a | def __call__(self, ttype, tstring, stup, etup, line): |
|---|
| 325 | n/a | # dispatch |
|---|
| 326 | n/a | ## import token |
|---|
| 327 | n/a | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ |
|---|
| 328 | n/a | ## 'tstring:', tstring |
|---|
| 329 | n/a | self.__state(ttype, tstring, stup[0]) |
|---|
| 330 | n/a | |
|---|
| 331 | n/a | def __waiting(self, ttype, tstring, lineno): |
|---|
| 332 | n/a | opts = self.__options |
|---|
| 333 | n/a | # Do docstring extractions, if enabled |
|---|
| 334 | n/a | if opts.docstrings and not opts.nodocstrings.get(self.__curfile): |
|---|
| 335 | n/a | # module docstring? |
|---|
| 336 | n/a | if self.__freshmodule: |
|---|
| 337 | n/a | if ttype == tokenize.STRING: |
|---|
| 338 | n/a | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
|---|
| 339 | n/a | self.__freshmodule = 0 |
|---|
| 340 | n/a | elif ttype not in (tokenize.COMMENT, tokenize.NL): |
|---|
| 341 | n/a | self.__freshmodule = 0 |
|---|
| 342 | n/a | return |
|---|
| 343 | n/a | # class docstring? |
|---|
| 344 | n/a | if ttype == tokenize.NAME and tstring in ('class', 'def'): |
|---|
| 345 | n/a | self.__state = self.__suiteseen |
|---|
| 346 | n/a | return |
|---|
| 347 | n/a | if ttype == tokenize.NAME and tstring in opts.keywords: |
|---|
| 348 | n/a | self.__state = self.__keywordseen |
|---|
| 349 | n/a | |
|---|
| 350 | n/a | def __suiteseen(self, ttype, tstring, lineno): |
|---|
| 351 | n/a | # ignore anything until we see the colon |
|---|
| 352 | n/a | if ttype == tokenize.OP and tstring == ':': |
|---|
| 353 | n/a | self.__state = self.__suitedocstring |
|---|
| 354 | n/a | |
|---|
| 355 | n/a | def __suitedocstring(self, ttype, tstring, lineno): |
|---|
| 356 | n/a | # ignore any intervening noise |
|---|
| 357 | n/a | if ttype == tokenize.STRING: |
|---|
| 358 | n/a | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
|---|
| 359 | n/a | self.__state = self.__waiting |
|---|
| 360 | n/a | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, |
|---|
| 361 | n/a | tokenize.COMMENT): |
|---|
| 362 | n/a | # there was no class docstring |
|---|
| 363 | n/a | self.__state = self.__waiting |
|---|
| 364 | n/a | |
|---|
| 365 | n/a | def __keywordseen(self, ttype, tstring, lineno): |
|---|
| 366 | n/a | if ttype == tokenize.OP and tstring == '(': |
|---|
| 367 | n/a | self.__data = [] |
|---|
| 368 | n/a | self.__lineno = lineno |
|---|
| 369 | n/a | self.__state = self.__openseen |
|---|
| 370 | n/a | else: |
|---|
| 371 | n/a | self.__state = self.__waiting |
|---|
| 372 | n/a | |
|---|
| 373 | n/a | def __openseen(self, ttype, tstring, lineno): |
|---|
| 374 | n/a | if ttype == tokenize.OP and tstring == ')': |
|---|
| 375 | n/a | # We've seen the last of the translatable strings. Record the |
|---|
| 376 | n/a | # line number of the first line of the strings and update the list |
|---|
| 377 | n/a | # of messages seen. Reset state for the next batch. If there |
|---|
| 378 | n/a | # were no strings inside _(), then just ignore this entry. |
|---|
| 379 | n/a | if self.__data: |
|---|
| 380 | n/a | self.__addentry(EMPTYSTRING.join(self.__data)) |
|---|
| 381 | n/a | self.__state = self.__waiting |
|---|
| 382 | n/a | elif ttype == tokenize.STRING: |
|---|
| 383 | n/a | self.__data.append(safe_eval(tstring)) |
|---|
| 384 | n/a | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, |
|---|
| 385 | n/a | token.NEWLINE, tokenize.NL]: |
|---|
| 386 | n/a | # warn if we see anything else than STRING or whitespace |
|---|
| 387 | n/a | print(_( |
|---|
| 388 | n/a | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' |
|---|
| 389 | n/a | ) % { |
|---|
| 390 | n/a | 'token': tstring, |
|---|
| 391 | n/a | 'file': self.__curfile, |
|---|
| 392 | n/a | 'lineno': self.__lineno |
|---|
| 393 | n/a | }, file=sys.stderr) |
|---|
| 394 | n/a | self.__state = self.__waiting |
|---|
| 395 | n/a | |
|---|
| 396 | n/a | def __addentry(self, msg, lineno=None, isdocstring=0): |
|---|
| 397 | n/a | if lineno is None: |
|---|
| 398 | n/a | lineno = self.__lineno |
|---|
| 399 | n/a | if not msg in self.__options.toexclude: |
|---|
| 400 | n/a | entry = (self.__curfile, lineno) |
|---|
| 401 | n/a | self.__messages.setdefault(msg, {})[entry] = isdocstring |
|---|
| 402 | n/a | |
|---|
| 403 | n/a | def set_filename(self, filename): |
|---|
| 404 | n/a | self.__curfile = filename |
|---|
| 405 | n/a | self.__freshmodule = 1 |
|---|
| 406 | n/a | |
|---|
| 407 | n/a | def write(self, fp): |
|---|
| 408 | n/a | options = self.__options |
|---|
| 409 | n/a | timestamp = time.strftime('%Y-%m-%d %H:%M%z') |
|---|
| 410 | n/a | encoding = fp.encoding if fp.encoding else 'UTF-8' |
|---|
| 411 | n/a | print(pot_header % {'time': timestamp, 'version': __version__, |
|---|
| 412 | n/a | 'charset': encoding, |
|---|
| 413 | n/a | 'encoding': '8bit'}, file=fp) |
|---|
| 414 | n/a | # Sort the entries. First sort each particular entry's keys, then |
|---|
| 415 | n/a | # sort all the entries by their first item. |
|---|
| 416 | n/a | reverse = {} |
|---|
| 417 | n/a | for k, v in self.__messages.items(): |
|---|
| 418 | n/a | keys = sorted(v.keys()) |
|---|
| 419 | n/a | reverse.setdefault(tuple(keys), []).append((k, v)) |
|---|
| 420 | n/a | rkeys = sorted(reverse.keys()) |
|---|
| 421 | n/a | for rkey in rkeys: |
|---|
| 422 | n/a | rentries = reverse[rkey] |
|---|
| 423 | n/a | rentries.sort() |
|---|
| 424 | n/a | for k, v in rentries: |
|---|
| 425 | n/a | # If the entry was gleaned out of a docstring, then add a |
|---|
| 426 | n/a | # comment stating so. This is to aid translators who may wish |
|---|
| 427 | n/a | # to skip translating some unimportant docstrings. |
|---|
| 428 | n/a | isdocstring = any(v.values()) |
|---|
| 429 | n/a | # k is the message string, v is a dictionary-set of (filename, |
|---|
| 430 | n/a | # lineno) tuples. We want to sort the entries in v first by |
|---|
| 431 | n/a | # file name and then by line number. |
|---|
| 432 | n/a | v = sorted(v.keys()) |
|---|
| 433 | n/a | if not options.writelocations: |
|---|
| 434 | n/a | pass |
|---|
| 435 | n/a | # location comments are different b/w Solaris and GNU: |
|---|
| 436 | n/a | elif options.locationstyle == options.SOLARIS: |
|---|
| 437 | n/a | for filename, lineno in v: |
|---|
| 438 | n/a | d = {'filename': filename, 'lineno': lineno} |
|---|
| 439 | n/a | print(_( |
|---|
| 440 | n/a | '# File: %(filename)s, line: %(lineno)d') % d, file=fp) |
|---|
| 441 | n/a | elif options.locationstyle == options.GNU: |
|---|
| 442 | n/a | # fit as many locations on one line, as long as the |
|---|
| 443 | n/a | # resulting line length doesn't exceed 'options.width' |
|---|
| 444 | n/a | locline = '#:' |
|---|
| 445 | n/a | for filename, lineno in v: |
|---|
| 446 | n/a | d = {'filename': filename, 'lineno': lineno} |
|---|
| 447 | n/a | s = _(' %(filename)s:%(lineno)d') % d |
|---|
| 448 | n/a | if len(locline) + len(s) <= options.width: |
|---|
| 449 | n/a | locline = locline + s |
|---|
| 450 | n/a | else: |
|---|
| 451 | n/a | print(locline, file=fp) |
|---|
| 452 | n/a | locline = "#:" + s |
|---|
| 453 | n/a | if len(locline) > 2: |
|---|
| 454 | n/a | print(locline, file=fp) |
|---|
| 455 | n/a | if isdocstring: |
|---|
| 456 | n/a | print('#, docstring', file=fp) |
|---|
| 457 | n/a | print('msgid', normalize(k, encoding), file=fp) |
|---|
| 458 | n/a | print('msgstr ""\n', file=fp) |
|---|
| 459 | n/a | |
|---|
| 460 | n/a | |
|---|
| 461 | n/a | |
|---|
| 462 | n/a | def main(): |
|---|
| 463 | n/a | global default_keywords |
|---|
| 464 | n/a | try: |
|---|
| 465 | n/a | opts, args = getopt.getopt( |
|---|
| 466 | n/a | sys.argv[1:], |
|---|
| 467 | n/a | 'ad:DEhk:Kno:p:S:Vvw:x:X:', |
|---|
| 468 | n/a | ['extract-all', 'default-domain=', 'escape', 'help', |
|---|
| 469 | n/a | 'keyword=', 'no-default-keywords', |
|---|
| 470 | n/a | 'add-location', 'no-location', 'output=', 'output-dir=', |
|---|
| 471 | n/a | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', |
|---|
| 472 | n/a | 'docstrings', 'no-docstrings', |
|---|
| 473 | n/a | ]) |
|---|
| 474 | n/a | except getopt.error as msg: |
|---|
| 475 | n/a | usage(1, msg) |
|---|
| 476 | n/a | |
|---|
| 477 | n/a | # for holding option values |
|---|
| 478 | n/a | class Options: |
|---|
| 479 | n/a | # constants |
|---|
| 480 | n/a | GNU = 1 |
|---|
| 481 | n/a | SOLARIS = 2 |
|---|
| 482 | n/a | # defaults |
|---|
| 483 | n/a | extractall = 0 # FIXME: currently this option has no effect at all. |
|---|
| 484 | n/a | escape = 0 |
|---|
| 485 | n/a | keywords = [] |
|---|
| 486 | n/a | outpath = '' |
|---|
| 487 | n/a | outfile = 'messages.pot' |
|---|
| 488 | n/a | writelocations = 1 |
|---|
| 489 | n/a | locationstyle = GNU |
|---|
| 490 | n/a | verbose = 0 |
|---|
| 491 | n/a | width = 78 |
|---|
| 492 | n/a | excludefilename = '' |
|---|
| 493 | n/a | docstrings = 0 |
|---|
| 494 | n/a | nodocstrings = {} |
|---|
| 495 | n/a | |
|---|
| 496 | n/a | options = Options() |
|---|
| 497 | n/a | locations = {'gnu' : options.GNU, |
|---|
| 498 | n/a | 'solaris' : options.SOLARIS, |
|---|
| 499 | n/a | } |
|---|
| 500 | n/a | |
|---|
| 501 | n/a | # parse options |
|---|
| 502 | n/a | for opt, arg in opts: |
|---|
| 503 | n/a | if opt in ('-h', '--help'): |
|---|
| 504 | n/a | usage(0) |
|---|
| 505 | n/a | elif opt in ('-a', '--extract-all'): |
|---|
| 506 | n/a | options.extractall = 1 |
|---|
| 507 | n/a | elif opt in ('-d', '--default-domain'): |
|---|
| 508 | n/a | options.outfile = arg + '.pot' |
|---|
| 509 | n/a | elif opt in ('-E', '--escape'): |
|---|
| 510 | n/a | options.escape = 1 |
|---|
| 511 | n/a | elif opt in ('-D', '--docstrings'): |
|---|
| 512 | n/a | options.docstrings = 1 |
|---|
| 513 | n/a | elif opt in ('-k', '--keyword'): |
|---|
| 514 | n/a | options.keywords.append(arg) |
|---|
| 515 | n/a | elif opt in ('-K', '--no-default-keywords'): |
|---|
| 516 | n/a | default_keywords = [] |
|---|
| 517 | n/a | elif opt in ('-n', '--add-location'): |
|---|
| 518 | n/a | options.writelocations = 1 |
|---|
| 519 | n/a | elif opt in ('--no-location',): |
|---|
| 520 | n/a | options.writelocations = 0 |
|---|
| 521 | n/a | elif opt in ('-S', '--style'): |
|---|
| 522 | n/a | options.locationstyle = locations.get(arg.lower()) |
|---|
| 523 | n/a | if options.locationstyle is None: |
|---|
| 524 | n/a | usage(1, _('Invalid value for --style: %s') % arg) |
|---|
| 525 | n/a | elif opt in ('-o', '--output'): |
|---|
| 526 | n/a | options.outfile = arg |
|---|
| 527 | n/a | elif opt in ('-p', '--output-dir'): |
|---|
| 528 | n/a | options.outpath = arg |
|---|
| 529 | n/a | elif opt in ('-v', '--verbose'): |
|---|
| 530 | n/a | options.verbose = 1 |
|---|
| 531 | n/a | elif opt in ('-V', '--version'): |
|---|
| 532 | n/a | print(_('pygettext.py (xgettext for Python) %s') % __version__) |
|---|
| 533 | n/a | sys.exit(0) |
|---|
| 534 | n/a | elif opt in ('-w', '--width'): |
|---|
| 535 | n/a | try: |
|---|
| 536 | n/a | options.width = int(arg) |
|---|
| 537 | n/a | except ValueError: |
|---|
| 538 | n/a | usage(1, _('--width argument must be an integer: %s') % arg) |
|---|
| 539 | n/a | elif opt in ('-x', '--exclude-file'): |
|---|
| 540 | n/a | options.excludefilename = arg |
|---|
| 541 | n/a | elif opt in ('-X', '--no-docstrings'): |
|---|
| 542 | n/a | fp = open(arg) |
|---|
| 543 | n/a | try: |
|---|
| 544 | n/a | while 1: |
|---|
| 545 | n/a | line = fp.readline() |
|---|
| 546 | n/a | if not line: |
|---|
| 547 | n/a | break |
|---|
| 548 | n/a | options.nodocstrings[line[:-1]] = 1 |
|---|
| 549 | n/a | finally: |
|---|
| 550 | n/a | fp.close() |
|---|
| 551 | n/a | |
|---|
| 552 | n/a | # calculate escapes |
|---|
| 553 | n/a | make_escapes(not options.escape) |
|---|
| 554 | n/a | |
|---|
| 555 | n/a | # calculate all keywords |
|---|
| 556 | n/a | options.keywords.extend(default_keywords) |
|---|
| 557 | n/a | |
|---|
| 558 | n/a | # initialize list of strings to exclude |
|---|
| 559 | n/a | if options.excludefilename: |
|---|
| 560 | n/a | try: |
|---|
| 561 | n/a | fp = open(options.excludefilename) |
|---|
| 562 | n/a | options.toexclude = fp.readlines() |
|---|
| 563 | n/a | fp.close() |
|---|
| 564 | n/a | except IOError: |
|---|
| 565 | n/a | print(_( |
|---|
| 566 | n/a | "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) |
|---|
| 567 | n/a | sys.exit(1) |
|---|
| 568 | n/a | else: |
|---|
| 569 | n/a | options.toexclude = [] |
|---|
| 570 | n/a | |
|---|
| 571 | n/a | # resolve args to module lists |
|---|
| 572 | n/a | expanded = [] |
|---|
| 573 | n/a | for arg in args: |
|---|
| 574 | n/a | if arg == '-': |
|---|
| 575 | n/a | expanded.append(arg) |
|---|
| 576 | n/a | else: |
|---|
| 577 | n/a | expanded.extend(getFilesForName(arg)) |
|---|
| 578 | n/a | args = expanded |
|---|
| 579 | n/a | |
|---|
| 580 | n/a | # slurp through all the files |
|---|
| 581 | n/a | eater = TokenEater(options) |
|---|
| 582 | n/a | for filename in args: |
|---|
| 583 | n/a | if filename == '-': |
|---|
| 584 | n/a | if options.verbose: |
|---|
| 585 | n/a | print(_('Reading standard input')) |
|---|
| 586 | n/a | fp = sys.stdin.buffer |
|---|
| 587 | n/a | closep = 0 |
|---|
| 588 | n/a | else: |
|---|
| 589 | n/a | if options.verbose: |
|---|
| 590 | n/a | print(_('Working on %s') % filename) |
|---|
| 591 | n/a | fp = open(filename, 'rb') |
|---|
| 592 | n/a | closep = 1 |
|---|
| 593 | n/a | try: |
|---|
| 594 | n/a | eater.set_filename(filename) |
|---|
| 595 | n/a | try: |
|---|
| 596 | n/a | tokens = tokenize.tokenize(fp.readline) |
|---|
| 597 | n/a | for _token in tokens: |
|---|
| 598 | n/a | eater(*_token) |
|---|
| 599 | n/a | except tokenize.TokenError as e: |
|---|
| 600 | n/a | print('%s: %s, line %d, column %d' % ( |
|---|
| 601 | n/a | e.args[0], filename, e.args[1][0], e.args[1][1]), |
|---|
| 602 | n/a | file=sys.stderr) |
|---|
| 603 | n/a | finally: |
|---|
| 604 | n/a | if closep: |
|---|
| 605 | n/a | fp.close() |
|---|
| 606 | n/a | |
|---|
| 607 | n/a | # write the output |
|---|
| 608 | n/a | if options.outfile == '-': |
|---|
| 609 | n/a | fp = sys.stdout |
|---|
| 610 | n/a | closep = 0 |
|---|
| 611 | n/a | else: |
|---|
| 612 | n/a | if options.outpath: |
|---|
| 613 | n/a | options.outfile = os.path.join(options.outpath, options.outfile) |
|---|
| 614 | n/a | fp = open(options.outfile, 'w') |
|---|
| 615 | n/a | closep = 1 |
|---|
| 616 | n/a | try: |
|---|
| 617 | n/a | eater.write(fp) |
|---|
| 618 | n/a | finally: |
|---|
| 619 | n/a | if closep: |
|---|
| 620 | n/a | fp.close() |
|---|
| 621 | n/a | |
|---|
| 622 | n/a | |
|---|
| 623 | n/a | if __name__ == '__main__': |
|---|
| 624 | n/a | main() |
|---|
| 625 | n/a | # some more test strings |
|---|
| 626 | n/a | # this one creates a warning |
|---|
| 627 | n/a | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} |
|---|
| 628 | n/a | _('more' 'than' 'one' 'string') |
|---|