Python code coverage for Tools/i18n/msgfmt.py

#	count	content
1	n/a	#! /usr/bin/env python3
2	n/a	# Written by Martin v. LÃ¶wis <loewis@informatik.hu-berlin.de>
3	n/a
4	n/a	"""Generate binary message catalog from textual translation description.
5	n/a
6	n/a	This program converts a textual Uniforum-style message catalog (.po file) into
7	n/a	a binary GNU catalog (.mo file). This is essentially the same function as the
8	n/a	GNU msgfmt program, however, it is a simpler implementation.
9	n/a
10	n/a	Usage: msgfmt.py [OPTIONS] filename.po
11	n/a
12	n/a	Options:
13	n/a	-o file
14	n/a	--output-file=file
15	n/a	Specify the output file to write to. If omitted, output will go to a
16	n/a	file named filename.mo (based off the input file name).
17	n/a
18	n/a	-h
19	n/a	--help
20	n/a	Print this message and exit.
21	n/a
22	n/a	-V
23	n/a	--version
24	n/a	Display version information and exit.
25	n/a	"""
26	n/a
27	n/a	import os
28	n/a	import sys
29	n/a	import ast
30	n/a	import getopt
31	n/a	import struct
32	n/a	import array
33	n/a	from email.parser import HeaderParser
34	n/a
35	n/a	__version__ = "1.1"
36	n/a
37	n/a	MESSAGES = {}
38	n/a
39	n/a
40	n/a
41	n/a	def usage(code, msg=''):
42	n/a	print(__doc__, file=sys.stderr)
43	n/a	if msg:
44	n/a	print(msg, file=sys.stderr)
45	n/a	sys.exit(code)
46	n/a
47	n/a
48	n/a
49	n/a	def add(id, str, fuzzy):
50	n/a	"Add a non-fuzzy translation to the dictionary."
51	n/a	global MESSAGES
52	n/a	if not fuzzy and str:
53	n/a	MESSAGES[id] = str
54	n/a
55	n/a
56	n/a
57	n/a	def generate():
58	n/a	"Return the generated output."
59	n/a	global MESSAGES
60	n/a	# the keys are sorted in the .mo file
61	n/a	keys = sorted(MESSAGES.keys())
62	n/a	offsets = []
63	n/a	ids = strs = b''
64	n/a	for id in keys:
65	n/a	# For each string, we need size and file offset. Each string is NUL
66	n/a	# terminated; the NUL does not count into the size.
67	n/a	offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
68	n/a	ids += id + b'\0'
69	n/a	strs += MESSAGES[id] + b'\0'
70	n/a	output = ''
71	n/a	# The header is 7 32-bit unsigned integers. We don't use hash tables, so
72	n/a	# the keys start right after the index tables.
73	n/a	# translated string.
74	n/a	keystart = 74+16len(keys)
75	n/a	# and the values start after the keys
76	n/a	valuestart = keystart + len(ids)
77	n/a	koffsets = []
78	n/a	voffsets = []
79	n/a	# The string table first has the list of keys, then the list of values.
80	n/a	# Each entry has first the size of the string, then the file offset.
81	n/a	for o1, l1, o2, l2 in offsets:
82	n/a	koffsets += [l1, o1+keystart]
83	n/a	voffsets += [l2, o2+valuestart]
84	n/a	offsets = koffsets + voffsets
85	n/a	output = struct.pack("Iiiiiii",
86	n/a	0x950412de, # Magic
87	n/a	0, # Version
88	n/a	len(keys), # # of entries
89	n/a	7*4, # start of key index
90	n/a	74+len(keys)8, # start of value index
91	n/a	0, 0) # size and offset of hash table
92	n/a	output += array.array("i", offsets).tostring()
93	n/a	output += ids
94	n/a	output += strs
95	n/a	return output
96	n/a
97	n/a
98	n/a
99	n/a	def make(filename, outfile):
100	n/a	ID = 1
101	n/a	STR = 2
102	n/a
103	n/a	# Compute .mo name from .po name and arguments
104	n/a	if filename.endswith('.po'):
105	n/a	infile = filename
106	n/a	else:
107	n/a	infile = filename + '.po'
108	n/a	if outfile is None:
109	n/a	outfile = os.path.splitext(infile)[0] + '.mo'
110	n/a
111	n/a	try:
112	n/a	lines = open(infile, 'rb').readlines()
113	n/a	except IOError as msg:
114	n/a	print(msg, file=sys.stderr)
115	n/a	sys.exit(1)
116	n/a
117	n/a	section = None
118	n/a	fuzzy = 0
119	n/a
120	n/a	# Start off assuming Latin-1, so everything decodes without failure,
121	n/a	# until we know the exact encoding
122	n/a	encoding = 'latin-1'
123	n/a
124	n/a	# Parse the catalog
125	n/a	lno = 0
126	n/a	for l in lines:
127	n/a	l = l.decode(encoding)
128	n/a	lno += 1
129	n/a	# If we get a comment line after a msgstr, this is a new entry
130	n/a	if l[0] == '#' and section == STR:
131	n/a	add(msgid, msgstr, fuzzy)
132	n/a	section = None
133	n/a	fuzzy = 0
134	n/a	# Record a fuzzy mark
135	n/a	if l[:2] == '#,' and 'fuzzy' in l:
136	n/a	fuzzy = 1
137	n/a	# Skip comments
138	n/a	if l[0] == '#':
139	n/a	continue
140	n/a	# Now we are in a msgid section, output previous section
141	n/a	if l.startswith('msgid') and not l.startswith('msgid_plural'):
142	n/a	if section == STR:
143	n/a	add(msgid, msgstr, fuzzy)
144	n/a	if not msgid:
145	n/a	# See whether there is an encoding declaration
146	n/a	p = HeaderParser()
147	n/a	charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
148	n/a	if charset:
149	n/a	encoding = charset
150	n/a	section = ID
151	n/a	l = l[5:]
152	n/a	msgid = msgstr = b''
153	n/a	is_plural = False
154	n/a	# This is a message with plural forms
155	n/a	elif l.startswith('msgid_plural'):
156	n/a	if section != ID:
157	n/a	print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
158	n/a	file=sys.stderr)
159	n/a	sys.exit(1)
160	n/a	l = l[12:]
161	n/a	msgid += b'\0' # separator of singular and plural
162	n/a	is_plural = True
163	n/a	# Now we are in a msgstr section
164	n/a	elif l.startswith('msgstr'):
165	n/a	section = STR
166	n/a	if l.startswith('msgstr['):
167	n/a	if not is_plural:
168	n/a	print('plural without msgid_plural on %s:%d' % (infile, lno),
169	n/a	file=sys.stderr)
170	n/a	sys.exit(1)
171	n/a	l = l.split(']', 1)[1]
172	n/a	if msgstr:
173	n/a	msgstr += b'\0' # Separator of the various plural forms
174	n/a	else:
175	n/a	if is_plural:
176	n/a	print('indexed msgstr required for plural on %s:%d' % (infile, lno),
177	n/a	file=sys.stderr)
178	n/a	sys.exit(1)
179	n/a	l = l[6:]
180	n/a	# Skip empty lines
181	n/a	l = l.strip()
182	n/a	if not l:
183	n/a	continue
184	n/a	l = ast.literal_eval(l)
185	n/a	if section == ID:
186	n/a	msgid += l.encode(encoding)
187	n/a	elif section == STR:
188	n/a	msgstr += l.encode(encoding)
189	n/a	else:
190	n/a	print('Syntax error on %s:%d' % (infile, lno), \
191	n/a	'before:', file=sys.stderr)
192	n/a	print(l, file=sys.stderr)
193	n/a	sys.exit(1)
194	n/a	# Add last entry
195	n/a	if section == STR:
196	n/a	add(msgid, msgstr, fuzzy)
197	n/a
198	n/a	# Compute output
199	n/a	output = generate()
200	n/a
201	n/a	try:
202	n/a	open(outfile,"wb").write(output)
203	n/a	except IOError as msg:
204	n/a	print(msg, file=sys.stderr)
205	n/a
206	n/a
207	n/a
208	n/a	def main():
209	n/a	try:
210	n/a	opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
211	n/a	['help', 'version', 'output-file='])
212	n/a	except getopt.error as msg:
213	n/a	usage(1, msg)
214	n/a
215	n/a	outfile = None
216	n/a	# parse options
217	n/a	for opt, arg in opts:
218	n/a	if opt in ('-h', '--help'):
219	n/a	usage(0)
220	n/a	elif opt in ('-V', '--version'):
221	n/a	print("msgfmt.py", __version__)
222	n/a	sys.exit(0)
223	n/a	elif opt in ('-o', '--output-file'):
224	n/a	outfile = arg
225	n/a	# do it
226	n/a	if not args:
227	n/a	print('No input file given', file=sys.stderr)
228	n/a	print("Try `msgfmt --help' for more information.", file=sys.stderr)
229	n/a	return
230	n/a
231	n/a	for filename in args:
232	n/a	make(filename, outfile)
233	n/a
234	n/a
235	n/a	if __name__ == '__main__':
236	n/a	main()