Python code coverage for Lib/pickletools.py

#	count	content
1	n/a	'''"Executable documentation" for the pickle module.
2	n/a
3	n/a	Extensive comments about the pickle protocols and pickle-machine opcodes
4	n/a	can be found here. Some functions meant for external use:
5	n/a
6	n/a	genops(pickle)
7	n/a	Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
8	n/a
9	n/a	dis(pickle, out=None, memo=None, indentlevel=4)
10	n/a	Print a symbolic disassembly of a pickle.
11	n/a	'''
12	n/a
13	n/a	import codecs
14	n/a	import io
15	n/a	import pickle
16	n/a	import re
17	n/a	import sys
18	n/a
19	n/a	__all__ = ['dis', 'genops', 'optimize']
20	n/a
21	n/a	bytes_types = pickle.bytes_types
22	n/a
23	n/a	# Other ideas:
24	n/a	#
25	n/a	# - A pickle verifier: read a pickle and check it exhaustively for
26	n/a	# well-formedness. dis() does a lot of this already.
27	n/a	#
28	n/a	# - A protocol identifier: examine a pickle and return its protocol number
29	n/a	# (== the highest .proto attr value among all the opcodes in the pickle).
30	n/a	# dis() already prints this info at the end.
31	n/a	#
32	n/a	# - A pickle optimizer: for example, tuple-building code is sometimes more
33	n/a	# elaborate than necessary, catering for the possibility that the tuple
34	n/a	# is recursive. Or lots of times a PUT is generated that's never accessed
35	n/a	# by a later GET.
36	n/a
37	n/a
38	n/a	# "A pickle" is a program for a virtual pickle machine (PM, but more accurately
39	n/a	# called an unpickling machine). It's a sequence of opcodes, interpreted by the
40	n/a	# PM, building an arbitrarily complex Python object.
41	n/a	#
42	n/a	# For the most part, the PM is very simple: there are no looping, testing, or
43	n/a	# conditional instructions, no arithmetic and no function calls. Opcodes are
44	n/a	# executed once each, from first to last, until a STOP opcode is reached.
45	n/a	#
46	n/a	# The PM has two data areas, "the stack" and "the memo".
47	n/a	#
48	n/a	# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
49	n/a	# integer object on the stack, whose value is gotten from a decimal string
50	n/a	# literal immediately following the INT opcode in the pickle bytestream. Other
51	n/a	# opcodes take Python objects off the stack. The result of unpickling is
52	n/a	# whatever object is left on the stack when the final STOP opcode is executed.
53	n/a	#
54	n/a	# The memo is simply an array of objects, or it can be implemented as a dict
55	n/a	# mapping little integers to objects. The memo serves as the PM's "long term
56	n/a	# memory", and the little integers indexing the memo are akin to variable
57	n/a	# names. Some opcodes pop a stack object into the memo at a given index,
58	n/a	# and others push a memo object at a given index onto the stack again.
59	n/a	#
60	n/a	# At heart, that's all the PM has. Subtleties arise for these reasons:
61	n/a	#
62	n/a	# + Object identity. Objects can be arbitrarily complex, and subobjects
63	n/a	# may be shared (for example, the list [a, a] refers to the same object a
64	n/a	# twice). It can be vital that unpickling recreate an isomorphic object
65	n/a	# graph, faithfully reproducing sharing.
66	n/a	#
67	n/a	# + Recursive objects. For example, after "L = []; L.append(L)", L is a
68	n/a	# list, and L[0] is the same list. This is related to the object identity
69	n/a	# point, and some sequences of pickle opcodes are subtle in order to
70	n/a	# get the right result in all cases.
71	n/a	#
72	n/a	# + Things pickle doesn't know everything about. Examples of things pickle
73	n/a	# does know everything about are Python's builtin scalar and container
74	n/a	# types, like ints and tuples. They generally have opcodes dedicated to
75	n/a	# them. For things like module references and instances of user-defined
76	n/a	# classes, pickle's knowledge is limited. Historically, many enhancements
77	n/a	# have been made to the pickle protocol in order to do a better (faster,
78	n/a	# and/or more compact) job on those.
79	n/a	#
80	n/a	# + Backward compatibility and micro-optimization. As explained below,
81	n/a	# pickle opcodes never go away, not even when better ways to do a thing
82	n/a	# get invented. The repertoire of the PM just keeps growing over time.
83	n/a	# For example, protocol 0 had two opcodes for building Python integers (INT
84	n/a	# and LONG), protocol 1 added three more for more-efficient pickling of short
85	n/a	# integers, and protocol 2 added two more for more-efficient pickling of
86	n/a	# long integers (before protocol 2, the only ways to pickle a Python long
87	n/a	# took time quadratic in the number of digits, for both pickling and
88	n/a	# unpickling). "Opcode bloat" isn't so much a subtlety as a source of
89	n/a	# wearying complication.
90	n/a	#
91	n/a	#
92	n/a	# Pickle protocols:
93	n/a	#
94	n/a	# For compatibility, the meaning of a pickle opcode never changes. Instead new
95	n/a	# pickle opcodes get added, and each version's unpickler can handle all the
96	n/a	# pickle opcodes in all protocol versions to date. So old pickles continue to
97	n/a	# be readable forever. The pickler can generally be told to restrict itself to
98	n/a	# the subset of opcodes available under previous protocol versions too, so that
99	n/a	# users can create pickles under the current version readable by older
100	n/a	# versions. However, a pickle does not contain its version number embedded
101	n/a	# within it. If an older unpickler tries to read a pickle using a later
102	n/a	# protocol, the result is most likely an exception due to seeing an unknown (in
103	n/a	# the older unpickler) opcode.
104	n/a	#
105	n/a	# The original pickle used what's now called "protocol 0", and what was called
106	n/a	# "text mode" before Python 2.3. The entire pickle bytestream is made up of
107	n/a	# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
108	n/a	# That's why it was called text mode. Protocol 0 is small and elegant, but
109	n/a	# sometimes painfully inefficient.
110	n/a	#
111	n/a	# The second major set of additions is now called "protocol 1", and was called
112	n/a	# "binary mode" before Python 2.3. This added many opcodes with arguments
113	n/a	# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
114	n/a	# bytes. Binary mode pickles can be substantially smaller than equivalent
115	n/a	# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
116	n/a	# int as 4 bytes following the opcode, which is cheaper to unpickle than the
117	n/a	# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added
118	n/a	# a number of opcodes that operate on many stack elements at once (like APPENDS
119	n/a	# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
120	n/a	#
121	n/a	# The third major set of additions came in Python 2.3, and is called "protocol
122	n/a	# 2". This added:
123	n/a	#
124	n/a	# - A better way to pickle instances of new-style classes (NEWOBJ).
125	n/a	#
126	n/a	# - A way for a pickle to identify its protocol (PROTO).
127	n/a	#
128	n/a	# - Time- and space- efficient pickling of long ints (LONG{1,4}).
129	n/a	#
130	n/a	# - Shortcuts for small tuples (TUPLE{1,2,3}}.
131	n/a	#
132	n/a	# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
133	n/a	#
134	n/a	# - The "extension registry", a vector of popular objects that can be pushed
135	n/a	# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
136	n/a	# the registry contents are predefined (there's nothing akin to the memo's
137	n/a	# PUT).
138	n/a	#
139	n/a	# Another independent change with Python 2.3 is the abandonment of any
140	n/a	# pretense that it might be safe to load pickles received from untrusted
141	n/a	# parties -- no sufficient security analysis has been done to guarantee
142	n/a	# this and there isn't a use case that warrants the expense of such an
143	n/a	# analysis.
144	n/a	#
145	n/a	# To this end, all tests for __safe_for_unpickling__ or for
146	n/a	# copyreg.safe_constructors are removed from the unpickling code.
147	n/a	# References to these variables in the descriptions below are to be seen
148	n/a	# as describing unpickling in Python 2.2 and before.
149	n/a
150	n/a
151	n/a	# Meta-rule: Descriptions are stored in instances of descriptor objects,
152	n/a	# with plain constructors. No meta-language is defined from which
153	n/a	# descriptors could be constructed. If you want, e.g., XML, write a little
154	n/a	# program to generate XML from the objects.
155	n/a
156	n/a	##############################################################################
157	n/a	# Some pickle opcodes have an argument, following the opcode in the
158	n/a	# bytestream. An argument is of a specific type, described by an instance
159	n/a	# of ArgumentDescriptor. These are not to be confused with arguments taken
160	n/a	# off the stack -- ArgumentDescriptor applies only to arguments embedded in
161	n/a	# the opcode stream, immediately following an opcode.
162	n/a
163	n/a	# Represents the number of bytes consumed by an argument delimited by the
164	n/a	# next newline character.
165	n/a	UP_TO_NEWLINE = -1
166	n/a
167	n/a	# Represents the number of bytes consumed by a two-argument opcode where
168	n/a	# the first argument gives the number of bytes in the second argument.
169	n/a	TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
170	n/a	TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
171	n/a	TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
172	n/a	TAKEN_FROM_ARGUMENT8U = -5 # num bytes is 8-byte unsigned little-endian int
173	n/a
174	n/a	class ArgumentDescriptor(object):
175	n/a	__slots__ = (
176	n/a	# name of descriptor record, also a module global name; a string
177	n/a	'name',
178	n/a
179	n/a	# length of argument, in bytes; an int; UP_TO_NEWLINE and
180	n/a	# TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length
181	n/a	# cases
182	n/a	'n',
183	n/a
184	n/a	# a function taking a file-like object, reading this kind of argument
185	n/a	# from the object at the current position, advancing the current
186	n/a	# position by n bytes, and returning the value of the argument
187	n/a	'reader',
188	n/a
189	n/a	# human-readable docs for this arg descriptor; a string
190	n/a	'doc',
191	n/a	)
192	n/a
193	n/a	def __init__(self, name, n, reader, doc):
194	n/a	assert isinstance(name, str)
195	n/a	self.name = name
196	n/a
197	n/a	assert isinstance(n, int) and (n >= 0 or
198	n/a	n in (UP_TO_NEWLINE,
199	n/a	TAKEN_FROM_ARGUMENT1,
200	n/a	TAKEN_FROM_ARGUMENT4,
201	n/a	TAKEN_FROM_ARGUMENT4U,
202	n/a	TAKEN_FROM_ARGUMENT8U))
203	n/a	self.n = n
204	n/a
205	n/a	self.reader = reader
206	n/a
207	n/a	assert isinstance(doc, str)
208	n/a	self.doc = doc
209	n/a
210	n/a	from struct import unpack as _unpack
211	n/a
212	n/a	def read_uint1(f):
213	n/a	r"""
214	n/a	>>> import io
215	n/a	>>> read_uint1(io.BytesIO(b'\xff'))
216	n/a	255
217	n/a	"""
218	n/a
219	n/a	data = f.read(1)
220	n/a	if data:
221	n/a	return data[0]
222	n/a	raise ValueError("not enough data in stream to read uint1")
223	n/a
224	n/a	uint1 = ArgumentDescriptor(
225	n/a	name='uint1',
226	n/a	n=1,
227	n/a	reader=read_uint1,
228	n/a	doc="One-byte unsigned integer.")
229	n/a
230	n/a
231	n/a	def read_uint2(f):
232	n/a	r"""
233	n/a	>>> import io
234	n/a	>>> read_uint2(io.BytesIO(b'\xff\x00'))
235	n/a	255
236	n/a	>>> read_uint2(io.BytesIO(b'\xff\xff'))
237	n/a	65535
238	n/a	"""
239	n/a
240	n/a	data = f.read(2)
241	n/a	if len(data) == 2:
242	n/a	return _unpack("<H", data)[0]
243	n/a	raise ValueError("not enough data in stream to read uint2")
244	n/a
245	n/a	uint2 = ArgumentDescriptor(
246	n/a	name='uint2',
247	n/a	n=2,
248	n/a	reader=read_uint2,
249	n/a	doc="Two-byte unsigned integer, little-endian.")
250	n/a
251	n/a
252	n/a	def read_int4(f):
253	n/a	r"""
254	n/a	>>> import io
255	n/a	>>> read_int4(io.BytesIO(b'\xff\x00\x00\x00'))
256	n/a	255
257	n/a	>>> read_int4(io.BytesIO(b'\x00\x00\x00\x80')) == -(2**31)
258	n/a	True
259	n/a	"""
260	n/a
261	n/a	data = f.read(4)
262	n/a	if len(data) == 4:
263	n/a	return _unpack("<i", data)[0]
264	n/a	raise ValueError("not enough data in stream to read int4")
265	n/a
266	n/a	int4 = ArgumentDescriptor(
267	n/a	name='int4',
268	n/a	n=4,
269	n/a	reader=read_int4,
270	n/a	doc="Four-byte signed integer, little-endian, 2's complement.")
271	n/a
272	n/a
273	n/a	def read_uint4(f):
274	n/a	r"""
275	n/a	>>> import io
276	n/a	>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
277	n/a	255
278	n/a	>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
279	n/a	True
280	n/a	"""
281	n/a
282	n/a	data = f.read(4)
283	n/a	if len(data) == 4:
284	n/a	return _unpack("<I", data)[0]
285	n/a	raise ValueError("not enough data in stream to read uint4")
286	n/a
287	n/a	uint4 = ArgumentDescriptor(
288	n/a	name='uint4',
289	n/a	n=4,
290	n/a	reader=read_uint4,
291	n/a	doc="Four-byte unsigned integer, little-endian.")
292	n/a
293	n/a
294	n/a	def read_uint8(f):
295	n/a	r"""
296	n/a	>>> import io
297	n/a	>>> read_uint8(io.BytesIO(b'\xff\x00\x00\x00\x00\x00\x00\x00'))
298	n/a	255
299	n/a	>>> read_uint8(io.BytesIO(b'\xff' * 8)) == 2**64-1
300	n/a	True
301	n/a	"""
302	n/a
303	n/a	data = f.read(8)
304	n/a	if len(data) == 8:
305	n/a	return _unpack("<Q", data)[0]
306	n/a	raise ValueError("not enough data in stream to read uint8")
307	n/a
308	n/a	uint8 = ArgumentDescriptor(
309	n/a	name='uint8',
310	n/a	n=8,
311	n/a	reader=read_uint8,
312	n/a	doc="Eight-byte unsigned integer, little-endian.")
313	n/a
314	n/a
315	n/a	def read_stringnl(f, decode=True, stripquotes=True):
316	n/a	r"""
317	n/a	>>> import io
318	n/a	>>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
319	n/a	'abcd'
320	n/a
321	n/a	>>> read_stringnl(io.BytesIO(b"\n"))
322	n/a	Traceback (most recent call last):
323	n/a	...
324	n/a	ValueError: no string quotes around b''
325	n/a
326	n/a	>>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
327	n/a	''
328	n/a
329	n/a	>>> read_stringnl(io.BytesIO(b"''\n"))
330	n/a	''
331	n/a
332	n/a	>>> read_stringnl(io.BytesIO(b'"abcd"'))
333	n/a	Traceback (most recent call last):
334	n/a	...
335	n/a	ValueError: no newline found when trying to read stringnl
336	n/a
337	n/a	Embedded escapes are undone in the result.
338	n/a	>>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'"))
339	n/a	'a\n\\b\x00c\td'
340	n/a	"""
341	n/a
342	n/a	data = f.readline()
343	n/a	if not data.endswith(b'\n'):
344	n/a	raise ValueError("no newline found when trying to read stringnl")
345	n/a	data = data[:-1] # lose the newline
346	n/a
347	n/a	if stripquotes:
348	n/a	for q in (b'"', b"'"):
349	n/a	if data.startswith(q):
350	n/a	if not data.endswith(q):
351	n/a	raise ValueError("strinq quote %r not found at both "
352	n/a	"ends of %r" % (q, data))
353	n/a	data = data[1:-1]
354	n/a	break
355	n/a	else:
356	n/a	raise ValueError("no string quotes around %r" % data)
357	n/a
358	n/a	if decode:
359	n/a	data = codecs.escape_decode(data)[0].decode("ascii")
360	n/a	return data
361	n/a
362	n/a	stringnl = ArgumentDescriptor(
363	n/a	name='stringnl',
364	n/a	n=UP_TO_NEWLINE,
365	n/a	reader=read_stringnl,
366	n/a	doc="""A newline-terminated string.
367	n/a
368	n/a	This is a repr-style string, with embedded escapes, and
369	n/a	bracketing quotes.
370	n/a	""")
371	n/a
372	n/a	def read_stringnl_noescape(f):
373	n/a	return read_stringnl(f, stripquotes=False)
374	n/a
375	n/a	stringnl_noescape = ArgumentDescriptor(
376	n/a	name='stringnl_noescape',
377	n/a	n=UP_TO_NEWLINE,
378	n/a	reader=read_stringnl_noescape,
379	n/a	doc="""A newline-terminated string.
380	n/a
381	n/a	This is a str-style string, without embedded escapes,
382	n/a	or bracketing quotes. It should consist solely of
383	n/a	printable ASCII characters.
384	n/a	""")
385	n/a
386	n/a	def read_stringnl_noescape_pair(f):
387	n/a	r"""
388	n/a	>>> import io
389	n/a	>>> read_stringnl_noescape_pair(io.BytesIO(b"Queue\nEmpty\njunk"))
390	n/a	'Queue Empty'
391	n/a	"""
392	n/a
393	n/a	return "%s %s" % (read_stringnl_noescape(f), read_stringnl_noescape(f))
394	n/a
395	n/a	stringnl_noescape_pair = ArgumentDescriptor(
396	n/a	name='stringnl_noescape_pair',
397	n/a	n=UP_TO_NEWLINE,
398	n/a	reader=read_stringnl_noescape_pair,
399	n/a	doc="""A pair of newline-terminated strings.
400	n/a
401	n/a	These are str-style strings, without embedded
402	n/a	escapes, or bracketing quotes. They should
403	n/a	consist solely of printable ASCII characters.
404	n/a	The pair is returned as a single string, with
405	n/a	a single blank separating the two strings.
406	n/a	""")
407	n/a
408	n/a
409	n/a	def read_string1(f):
410	n/a	r"""
411	n/a	>>> import io
412	n/a	>>> read_string1(io.BytesIO(b"\x00"))
413	n/a	''
414	n/a	>>> read_string1(io.BytesIO(b"\x03abcdef"))
415	n/a	'abc'
416	n/a	"""
417	n/a
418	n/a	n = read_uint1(f)
419	n/a	assert n >= 0
420	n/a	data = f.read(n)
421	n/a	if len(data) == n:
422	n/a	return data.decode("latin-1")
423	n/a	raise ValueError("expected %d bytes in a string1, but only %d remain" %
424	n/a	(n, len(data)))
425	n/a
426	n/a	string1 = ArgumentDescriptor(
427	n/a	name="string1",
428	n/a	n=TAKEN_FROM_ARGUMENT1,
429	n/a	reader=read_string1,
430	n/a	doc="""A counted string.
431	n/a
432	n/a	The first argument is a 1-byte unsigned int giving the number
433	n/a	of bytes in the string, and the second argument is that many
434	n/a	bytes.
435	n/a	""")
436	n/a
437	n/a
438	n/a	def read_string4(f):
439	n/a	r"""
440	n/a	>>> import io
441	n/a	>>> read_string4(io.BytesIO(b"\x00\x00\x00\x00abc"))
442	n/a	''
443	n/a	>>> read_string4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
444	n/a	'abc'
445	n/a	>>> read_string4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
446	n/a	Traceback (most recent call last):
447	n/a	...
448	n/a	ValueError: expected 50331648 bytes in a string4, but only 6 remain
449	n/a	"""
450	n/a
451	n/a	n = read_int4(f)
452	n/a	if n < 0:
453	n/a	raise ValueError("string4 byte count < 0: %d" % n)
454	n/a	data = f.read(n)
455	n/a	if len(data) == n:
456	n/a	return data.decode("latin-1")
457	n/a	raise ValueError("expected %d bytes in a string4, but only %d remain" %
458	n/a	(n, len(data)))
459	n/a
460	n/a	string4 = ArgumentDescriptor(
461	n/a	name="string4",
462	n/a	n=TAKEN_FROM_ARGUMENT4,
463	n/a	reader=read_string4,
464	n/a	doc="""A counted string.
465	n/a
466	n/a	The first argument is a 4-byte little-endian signed int giving
467	n/a	the number of bytes in the string, and the second argument is
468	n/a	that many bytes.
469	n/a	""")
470	n/a
471	n/a
472	n/a	def read_bytes1(f):
473	n/a	r"""
474	n/a	>>> import io
475	n/a	>>> read_bytes1(io.BytesIO(b"\x00"))
476	n/a	b''
477	n/a	>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
478	n/a	b'abc'
479	n/a	"""
480	n/a
481	n/a	n = read_uint1(f)
482	n/a	assert n >= 0
483	n/a	data = f.read(n)
484	n/a	if len(data) == n:
485	n/a	return data
486	n/a	raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
487	n/a	(n, len(data)))
488	n/a
489	n/a	bytes1 = ArgumentDescriptor(
490	n/a	name="bytes1",
491	n/a	n=TAKEN_FROM_ARGUMENT1,
492	n/a	reader=read_bytes1,
493	n/a	doc="""A counted bytes string.
494	n/a
495	n/a	The first argument is a 1-byte unsigned int giving the number
496	n/a	of bytes in the string, and the second argument is that many
497	n/a	bytes.
498	n/a	""")
499	n/a
500	n/a
501	n/a	def read_bytes1(f):
502	n/a	r"""
503	n/a	>>> import io
504	n/a	>>> read_bytes1(io.BytesIO(b"\x00"))
505	n/a	b''
506	n/a	>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
507	n/a	b'abc'
508	n/a	"""
509	n/a
510	n/a	n = read_uint1(f)
511	n/a	assert n >= 0
512	n/a	data = f.read(n)
513	n/a	if len(data) == n:
514	n/a	return data
515	n/a	raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
516	n/a	(n, len(data)))
517	n/a
518	n/a	bytes1 = ArgumentDescriptor(
519	n/a	name="bytes1",
520	n/a	n=TAKEN_FROM_ARGUMENT1,
521	n/a	reader=read_bytes1,
522	n/a	doc="""A counted bytes string.
523	n/a
524	n/a	The first argument is a 1-byte unsigned int giving the number
525	n/a	of bytes, and the second argument is that many bytes.
526	n/a	""")
527	n/a
528	n/a
529	n/a	def read_bytes4(f):
530	n/a	r"""
531	n/a	>>> import io
532	n/a	>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
533	n/a	b''
534	n/a	>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
535	n/a	b'abc'
536	n/a	>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
537	n/a	Traceback (most recent call last):
538	n/a	...
539	n/a	ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
540	n/a	"""
541	n/a
542	n/a	n = read_uint4(f)
543	n/a	assert n >= 0
544	n/a	if n > sys.maxsize:
545	n/a	raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
546	n/a	data = f.read(n)
547	n/a	if len(data) == n:
548	n/a	return data
549	n/a	raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
550	n/a	(n, len(data)))
551	n/a
552	n/a	bytes4 = ArgumentDescriptor(
553	n/a	name="bytes4",
554	n/a	n=TAKEN_FROM_ARGUMENT4U,
555	n/a	reader=read_bytes4,
556	n/a	doc="""A counted bytes string.
557	n/a
558	n/a	The first argument is a 4-byte little-endian unsigned int giving
559	n/a	the number of bytes, and the second argument is that many bytes.
560	n/a	""")
561	n/a
562	n/a
563	n/a	def read_bytes8(f):
564	n/a	r"""
565	n/a	>>> import io, struct, sys
566	n/a	>>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
567	n/a	b''
568	n/a	>>> read_bytes8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
569	n/a	b'abc'
570	n/a	>>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
571	n/a	>>> read_bytes8(io.BytesIO(bigsize8 + b"abcdef")) #doctest: +ELLIPSIS
572	n/a	Traceback (most recent call last):
573	n/a	...
574	n/a	ValueError: expected ... bytes in a bytes8, but only 6 remain
575	n/a	"""
576	n/a
577	n/a	n = read_uint8(f)
578	n/a	assert n >= 0
579	n/a	if n > sys.maxsize:
580	n/a	raise ValueError("bytes8 byte count > sys.maxsize: %d" % n)
581	n/a	data = f.read(n)
582	n/a	if len(data) == n:
583	n/a	return data
584	n/a	raise ValueError("expected %d bytes in a bytes8, but only %d remain" %
585	n/a	(n, len(data)))
586	n/a
587	n/a	bytes8 = ArgumentDescriptor(
588	n/a	name="bytes8",
589	n/a	n=TAKEN_FROM_ARGUMENT8U,
590	n/a	reader=read_bytes8,
591	n/a	doc="""A counted bytes string.
592	n/a
593	n/a	The first argument is an 8-byte little-endian unsigned int giving
594	n/a	the number of bytes, and the second argument is that many bytes.
595	n/a	""")
596	n/a
597	n/a	def read_unicodestringnl(f):
598	n/a	r"""
599	n/a	>>> import io
600	n/a	>>> read_unicodestringnl(io.BytesIO(b"abc\\uabcd\njunk")) == 'abc\uabcd'
601	n/a	True
602	n/a	"""
603	n/a
604	n/a	data = f.readline()
605	n/a	if not data.endswith(b'\n'):
606	n/a	raise ValueError("no newline found when trying to read "
607	n/a	"unicodestringnl")
608	n/a	data = data[:-1] # lose the newline
609	n/a	return str(data, 'raw-unicode-escape')
610	n/a
611	n/a	unicodestringnl = ArgumentDescriptor(
612	n/a	name='unicodestringnl',
613	n/a	n=UP_TO_NEWLINE,
614	n/a	reader=read_unicodestringnl,
615	n/a	doc="""A newline-terminated Unicode string.
616	n/a
617	n/a	This is raw-unicode-escape encoded, so consists of
618	n/a	printable ASCII characters, and may contain embedded
619	n/a	escape sequences.
620	n/a	""")
621	n/a
622	n/a
623	n/a	def read_unicodestring1(f):
624	n/a	r"""
625	n/a	>>> import io
626	n/a	>>> s = 'abcd\uabcd'
627	n/a	>>> enc = s.encode('utf-8')
628	n/a	>>> enc
629	n/a	b'abcd\xea\xaf\x8d'
630	n/a	>>> n = bytes([len(enc)]) # little-endian 1-byte length
631	n/a	>>> t = read_unicodestring1(io.BytesIO(n + enc + b'junk'))
632	n/a	>>> s == t
633	n/a	True
634	n/a
635	n/a	>>> read_unicodestring1(io.BytesIO(n + enc[:-1]))
636	n/a	Traceback (most recent call last):
637	n/a	...
638	n/a	ValueError: expected 7 bytes in a unicodestring1, but only 6 remain
639	n/a	"""
640	n/a
641	n/a	n = read_uint1(f)
642	n/a	assert n >= 0
643	n/a	data = f.read(n)
644	n/a	if len(data) == n:
645	n/a	return str(data, 'utf-8', 'surrogatepass')
646	n/a	raise ValueError("expected %d bytes in a unicodestring1, but only %d "
647	n/a	"remain" % (n, len(data)))
648	n/a
649	n/a	unicodestring1 = ArgumentDescriptor(
650	n/a	name="unicodestring1",
651	n/a	n=TAKEN_FROM_ARGUMENT1,
652	n/a	reader=read_unicodestring1,
653	n/a	doc="""A counted Unicode string.
654	n/a
655	n/a	The first argument is a 1-byte little-endian signed int
656	n/a	giving the number of bytes in the string, and the second
657	n/a	argument-- the UTF-8 encoding of the Unicode string --
658	n/a	contains that many bytes.
659	n/a	""")
660	n/a
661	n/a
662	n/a	def read_unicodestring4(f):
663	n/a	r"""
664	n/a	>>> import io
665	n/a	>>> s = 'abcd\uabcd'
666	n/a	>>> enc = s.encode('utf-8')
667	n/a	>>> enc
668	n/a	b'abcd\xea\xaf\x8d'
669	n/a	>>> n = bytes([len(enc), 0, 0, 0]) # little-endian 4-byte length
670	n/a	>>> t = read_unicodestring4(io.BytesIO(n + enc + b'junk'))
671	n/a	>>> s == t
672	n/a	True
673	n/a
674	n/a	>>> read_unicodestring4(io.BytesIO(n + enc[:-1]))
675	n/a	Traceback (most recent call last):
676	n/a	...
677	n/a	ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
678	n/a	"""
679	n/a
680	n/a	n = read_uint4(f)
681	n/a	assert n >= 0
682	n/a	if n > sys.maxsize:
683	n/a	raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
684	n/a	data = f.read(n)
685	n/a	if len(data) == n:
686	n/a	return str(data, 'utf-8', 'surrogatepass')
687	n/a	raise ValueError("expected %d bytes in a unicodestring4, but only %d "
688	n/a	"remain" % (n, len(data)))
689	n/a
690	n/a	unicodestring4 = ArgumentDescriptor(
691	n/a	name="unicodestring4",
692	n/a	n=TAKEN_FROM_ARGUMENT4U,
693	n/a	reader=read_unicodestring4,
694	n/a	doc="""A counted Unicode string.
695	n/a
696	n/a	The first argument is a 4-byte little-endian signed int
697	n/a	giving the number of bytes in the string, and the second
698	n/a	argument-- the UTF-8 encoding of the Unicode string --
699	n/a	contains that many bytes.
700	n/a	""")
701	n/a
702	n/a
703	n/a	def read_unicodestring8(f):
704	n/a	r"""
705	n/a	>>> import io
706	n/a	>>> s = 'abcd\uabcd'
707	n/a	>>> enc = s.encode('utf-8')
708	n/a	>>> enc
709	n/a	b'abcd\xea\xaf\x8d'
710	n/a	>>> n = bytes([len(enc)]) + b'\0' * 7 # little-endian 8-byte length
711	n/a	>>> t = read_unicodestring8(io.BytesIO(n + enc + b'junk'))
712	n/a	>>> s == t
713	n/a	True
714	n/a
715	n/a	>>> read_unicodestring8(io.BytesIO(n + enc[:-1]))
716	n/a	Traceback (most recent call last):
717	n/a	...
718	n/a	ValueError: expected 7 bytes in a unicodestring8, but only 6 remain
719	n/a	"""
720	n/a
721	n/a	n = read_uint8(f)
722	n/a	assert n >= 0
723	n/a	if n > sys.maxsize:
724	n/a	raise ValueError("unicodestring8 byte count > sys.maxsize: %d" % n)
725	n/a	data = f.read(n)
726	n/a	if len(data) == n:
727	n/a	return str(data, 'utf-8', 'surrogatepass')
728	n/a	raise ValueError("expected %d bytes in a unicodestring8, but only %d "
729	n/a	"remain" % (n, len(data)))
730	n/a
731	n/a	unicodestring8 = ArgumentDescriptor(
732	n/a	name="unicodestring8",
733	n/a	n=TAKEN_FROM_ARGUMENT8U,
734	n/a	reader=read_unicodestring8,
735	n/a	doc="""A counted Unicode string.
736	n/a
737	n/a	The first argument is an 8-byte little-endian signed int
738	n/a	giving the number of bytes in the string, and the second
739	n/a	argument-- the UTF-8 encoding of the Unicode string --
740	n/a	contains that many bytes.
741	n/a	""")
742	n/a
743	n/a
744	n/a	def read_decimalnl_short(f):
745	n/a	r"""
746	n/a	>>> import io
747	n/a	>>> read_decimalnl_short(io.BytesIO(b"1234\n56"))
748	n/a	1234
749	n/a
750	n/a	>>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
751	n/a	Traceback (most recent call last):
752	n/a	...
753	n/a	ValueError: invalid literal for int() with base 10: b'1234L'
754	n/a	"""
755	n/a
756	n/a	s = read_stringnl(f, decode=False, stripquotes=False)
757	n/a
758	n/a	# There's a hack for True and False here.
759	n/a	if s == b"00":
760	n/a	return False
761	n/a	elif s == b"01":
762	n/a	return True
763	n/a
764	n/a	return int(s)
765	n/a
766	n/a	def read_decimalnl_long(f):
767	n/a	r"""
768	n/a	>>> import io
769	n/a
770	n/a	>>> read_decimalnl_long(io.BytesIO(b"1234L\n56"))
771	n/a	1234
772	n/a
773	n/a	>>> read_decimalnl_long(io.BytesIO(b"123456789012345678901234L\n6"))
774	n/a	123456789012345678901234
775	n/a	"""
776	n/a
777	n/a	s = read_stringnl(f, decode=False, stripquotes=False)
778	n/a	if s[-1:] == b'L':
779	n/a	s = s[:-1]
780	n/a	return int(s)
781	n/a
782	n/a
783	n/a	decimalnl_short = ArgumentDescriptor(
784	n/a	name='decimalnl_short',
785	n/a	n=UP_TO_NEWLINE,
786	n/a	reader=read_decimalnl_short,
787	n/a	doc="""A newline-terminated decimal integer literal.
788	n/a
789	n/a	This never has a trailing 'L', and the integer fit
790	n/a	in a short Python int on the box where the pickle
791	n/a	was written -- but there's no guarantee it will fit
792	n/a	in a short Python int on the box where the pickle
793	n/a	is read.
794	n/a	""")
795	n/a
796	n/a	decimalnl_long = ArgumentDescriptor(
797	n/a	name='decimalnl_long',
798	n/a	n=UP_TO_NEWLINE,
799	n/a	reader=read_decimalnl_long,
800	n/a	doc="""A newline-terminated decimal integer literal.
801	n/a
802	n/a	This has a trailing 'L', and can represent integers
803	n/a	of any size.
804	n/a	""")
805	n/a
806	n/a
807	n/a	def read_floatnl(f):
808	n/a	r"""
809	n/a	>>> import io
810	n/a	>>> read_floatnl(io.BytesIO(b"-1.25\n6"))
811	n/a	-1.25
812	n/a	"""
813	n/a	s = read_stringnl(f, decode=False, stripquotes=False)
814	n/a	return float(s)
815	n/a
816	n/a	floatnl = ArgumentDescriptor(
817	n/a	name='floatnl',
818	n/a	n=UP_TO_NEWLINE,
819	n/a	reader=read_floatnl,
820	n/a	doc="""A newline-terminated decimal floating literal.
821	n/a
822	n/a	In general this requires 17 significant digits for roundtrip
823	n/a	identity, and pickling then unpickling infinities, NaNs, and
824	n/a	minus zero doesn't work across boxes, or on some boxes even
825	n/a	on itself (e.g., Windows can't read the strings it produces
826	n/a	for infinities or NaNs).
827	n/a	""")
828	n/a
829	n/a	def read_float8(f):
830	n/a	r"""
831	n/a	>>> import io, struct
832	n/a	>>> raw = struct.pack(">d", -1.25)
833	n/a	>>> raw
834	n/a	b'\xbf\xf4\x00\x00\x00\x00\x00\x00'
835	n/a	>>> read_float8(io.BytesIO(raw + b"\n"))
836	n/a	-1.25
837	n/a	"""
838	n/a
839	n/a	data = f.read(8)
840	n/a	if len(data) == 8:
841	n/a	return _unpack(">d", data)[0]
842	n/a	raise ValueError("not enough data in stream to read float8")
843	n/a
844	n/a
845	n/a	float8 = ArgumentDescriptor(
846	n/a	name='float8',
847	n/a	n=8,
848	n/a	reader=read_float8,
849	n/a	doc="""An 8-byte binary representation of a float, big-endian.
850	n/a
851	n/a	The format is unique to Python, and shared with the struct
852	n/a	module (format string '>d') "in theory" (the struct and pickle
853	n/a	implementations don't share the code -- they should). It's
854	n/a	strongly related to the IEEE-754 double format, and, in normal
855	n/a	cases, is in fact identical to the big-endian 754 double format.
856	n/a	On other boxes the dynamic range is limited to that of a 754
857	n/a	double, and "add a half and chop" rounding is used to reduce
858	n/a	the precision to 53 bits. However, even on a 754 box,
859	n/a	infinities, NaNs, and minus zero may not be handled correctly
860	n/a	(may not survive roundtrip pickling intact).
861	n/a	""")
862	n/a
863	n/a	# Protocol 2 formats
864	n/a
865	n/a	from pickle import decode_long
866	n/a
867	n/a	def read_long1(f):
868	n/a	r"""
869	n/a	>>> import io
870	n/a	>>> read_long1(io.BytesIO(b"\x00"))
871	n/a	0
872	n/a	>>> read_long1(io.BytesIO(b"\x02\xff\x00"))
873	n/a	255
874	n/a	>>> read_long1(io.BytesIO(b"\x02\xff\x7f"))
875	n/a	32767
876	n/a	>>> read_long1(io.BytesIO(b"\x02\x00\xff"))
877	n/a	-256
878	n/a	>>> read_long1(io.BytesIO(b"\x02\x00\x80"))
879	n/a	-32768
880	n/a	"""
881	n/a
882	n/a	n = read_uint1(f)
883	n/a	data = f.read(n)
884	n/a	if len(data) != n:
885	n/a	raise ValueError("not enough data in stream to read long1")
886	n/a	return decode_long(data)
887	n/a
888	n/a	long1 = ArgumentDescriptor(
889	n/a	name="long1",
890	n/a	n=TAKEN_FROM_ARGUMENT1,
891	n/a	reader=read_long1,
892	n/a	doc="""A binary long, little-endian, using 1-byte size.
893	n/a
894	n/a	This first reads one byte as an unsigned size, then reads that
895	n/a	many bytes and interprets them as a little-endian 2's-complement long.
896	n/a	If the size is 0, that's taken as a shortcut for the long 0L.
897	n/a	""")
898	n/a
899	n/a	def read_long4(f):
900	n/a	r"""
901	n/a	>>> import io
902	n/a	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x00"))
903	n/a	255
904	n/a	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x7f"))
905	n/a	32767
906	n/a	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\xff"))
907	n/a	-256
908	n/a	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\x80"))
909	n/a	-32768
910	n/a	>>> read_long1(io.BytesIO(b"\x00\x00\x00\x00"))
911	n/a	0
912	n/a	"""
913	n/a
914	n/a	n = read_int4(f)
915	n/a	if n < 0:
916	n/a	raise ValueError("long4 byte count < 0: %d" % n)
917	n/a	data = f.read(n)
918	n/a	if len(data) != n:
919	n/a	raise ValueError("not enough data in stream to read long4")
920	n/a	return decode_long(data)
921	n/a
922	n/a	long4 = ArgumentDescriptor(
923	n/a	name="long4",
924	n/a	n=TAKEN_FROM_ARGUMENT4,
925	n/a	reader=read_long4,
926	n/a	doc="""A binary representation of a long, little-endian.
927	n/a
928	n/a	This first reads four bytes as a signed size (but requires the
929	n/a	size to be >= 0), then reads that many bytes and interprets them
930	n/a	as a little-endian 2's-complement long. If the size is 0, that's taken
931	n/a	as a shortcut for the int 0, although LONG1 should really be used
932	n/a	then instead (and in any case where # of bytes < 256).
933	n/a	""")
934	n/a
935	n/a
936	n/a	##############################################################################
937	n/a	# Object descriptors. The stack used by the pickle machine holds objects,
938	n/a	# and in the stack_before and stack_after attributes of OpcodeInfo
939	n/a	# descriptors we need names to describe the various types of objects that can
940	n/a	# appear on the stack.
941	n/a
942	n/a	class StackObject(object):
943	n/a	__slots__ = (
944	n/a	# name of descriptor record, for info only
945	n/a	'name',
946	n/a
947	n/a	# type of object, or tuple of type objects (meaning the object can
948	n/a	# be of any type in the tuple)
949	n/a	'obtype',
950	n/a
951	n/a	# human-readable docs for this kind of stack object; a string
952	n/a	'doc',
953	n/a	)
954	n/a
955	n/a	def __init__(self, name, obtype, doc):
956	n/a	assert isinstance(name, str)
957	n/a	self.name = name
958	n/a
959	n/a	assert isinstance(obtype, type) or isinstance(obtype, tuple)
960	n/a	if isinstance(obtype, tuple):
961	n/a	for contained in obtype:
962	n/a	assert isinstance(contained, type)
963	n/a	self.obtype = obtype
964	n/a
965	n/a	assert isinstance(doc, str)
966	n/a	self.doc = doc
967	n/a
968	n/a	def __repr__(self):
969	n/a	return self.name
970	n/a
971	n/a
972	n/a	pyint = pylong = StackObject(
973	n/a	name='int',
974	n/a	obtype=int,
975	n/a	doc="A Python integer object.")
976	n/a
977	n/a	pyinteger_or_bool = StackObject(
978	n/a	name='int_or_bool',
979	n/a	obtype=(int, bool),
980	n/a	doc="A Python integer or boolean object.")
981	n/a
982	n/a	pybool = StackObject(
983	n/a	name='bool',
984	n/a	obtype=bool,
985	n/a	doc="A Python boolean object.")
986	n/a
987	n/a	pyfloat = StackObject(
988	n/a	name='float',
989	n/a	obtype=float,
990	n/a	doc="A Python float object.")
991	n/a
992	n/a	pybytes_or_str = pystring = StackObject(
993	n/a	name='bytes_or_str',
994	n/a	obtype=(bytes, str),
995	n/a	doc="A Python bytes or (Unicode) string object.")
996	n/a
997	n/a	pybytes = StackObject(
998	n/a	name='bytes',
999	n/a	obtype=bytes,
1000	n/a	doc="A Python bytes object.")
1001	n/a
1002	n/a	pyunicode = StackObject(
1003	n/a	name='str',
1004	n/a	obtype=str,
1005	n/a	doc="A Python (Unicode) string object.")
1006	n/a
1007	n/a	pynone = StackObject(
1008	n/a	name="None",
1009	n/a	obtype=type(None),
1010	n/a	doc="The Python None object.")
1011	n/a
1012	n/a	pytuple = StackObject(
1013	n/a	name="tuple",
1014	n/a	obtype=tuple,
1015	n/a	doc="A Python tuple object.")
1016	n/a
1017	n/a	pylist = StackObject(
1018	n/a	name="list",
1019	n/a	obtype=list,
1020	n/a	doc="A Python list object.")
1021	n/a
1022	n/a	pydict = StackObject(
1023	n/a	name="dict",
1024	n/a	obtype=dict,
1025	n/a	doc="A Python dict object.")
1026	n/a
1027	n/a	pyset = StackObject(
1028	n/a	name="set",
1029	n/a	obtype=set,
1030	n/a	doc="A Python set object.")
1031	n/a
1032	n/a	pyfrozenset = StackObject(
1033	n/a	name="frozenset",
1034	n/a	obtype=set,
1035	n/a	doc="A Python frozenset object.")
1036	n/a
1037	n/a	anyobject = StackObject(
1038	n/a	name='any',
1039	n/a	obtype=object,
1040	n/a	doc="Any kind of object whatsoever.")
1041	n/a
1042	n/a	markobject = StackObject(
1043	n/a	name="mark",
1044	n/a	obtype=StackObject,
1045	n/a	doc="""'The mark' is a unique object.
1046	n/a
1047	n/a	Opcodes that operate on a variable number of objects
1048	n/a	generally don't embed the count of objects in the opcode,
1049	n/a	or pull it off the stack. Instead the MARK opcode is used
1050	n/a	to push a special marker object on the stack, and then
1051	n/a	some other opcodes grab all the objects from the top of
1052	n/a	the stack down to (but not including) the topmost marker
1053	n/a	object.
1054	n/a	""")
1055	n/a
1056	n/a	stackslice = StackObject(
1057	n/a	name="stackslice",
1058	n/a	obtype=StackObject,
1059	n/a	doc="""An object representing a contiguous slice of the stack.
1060	n/a
1061	n/a	This is used in conjunction with markobject, to represent all
1062	n/a	of the stack following the topmost markobject. For example,
1063	n/a	the POP_MARK opcode changes the stack from
1064	n/a
1065	n/a	[..., markobject, stackslice]
1066	n/a	to
1067	n/a	[...]
1068	n/a
1069	n/a	No matter how many object are on the stack after the topmost
1070	n/a	markobject, POP_MARK gets rid of all of them (including the
1071	n/a	topmost markobject too).
1072	n/a	""")
1073	n/a
1074	n/a	##############################################################################
1075	n/a	# Descriptors for pickle opcodes.
1076	n/a
1077	n/a	class OpcodeInfo(object):
1078	n/a
1079	n/a	__slots__ = (
1080	n/a	# symbolic name of opcode; a string
1081	n/a	'name',
1082	n/a
1083	n/a	# the code used in a bytestream to represent the opcode; a
1084	n/a	# one-character string
1085	n/a	'code',
1086	n/a
1087	n/a	# If the opcode has an argument embedded in the byte string, an
1088	n/a	# instance of ArgumentDescriptor specifying its type. Note that
1089	n/a	# arg.reader(s) can be used to read and decode the argument from
1090	n/a	# the bytestream s, and arg.doc documents the format of the raw
1091	n/a	# argument bytes. If the opcode doesn't have an argument embedded
1092	n/a	# in the bytestream, arg should be None.
1093	n/a	'arg',
1094	n/a
1095	n/a	# what the stack looks like before this opcode runs; a list
1096	n/a	'stack_before',
1097	n/a
1098	n/a	# what the stack looks like after this opcode runs; a list
1099	n/a	'stack_after',
1100	n/a
1101	n/a	# the protocol number in which this opcode was introduced; an int
1102	n/a	'proto',
1103	n/a
1104	n/a	# human-readable docs for this opcode; a string
1105	n/a	'doc',
1106	n/a	)
1107	n/a
1108	n/a	def __init__(self, name, code, arg,
1109	n/a	stack_before, stack_after, proto, doc):
1110	n/a	assert isinstance(name, str)
1111	n/a	self.name = name
1112	n/a
1113	n/a	assert isinstance(code, str)
1114	n/a	assert len(code) == 1
1115	n/a	self.code = code
1116	n/a
1117	n/a	assert arg is None or isinstance(arg, ArgumentDescriptor)
1118	n/a	self.arg = arg
1119	n/a
1120	n/a	assert isinstance(stack_before, list)
1121	n/a	for x in stack_before:
1122	n/a	assert isinstance(x, StackObject)
1123	n/a	self.stack_before = stack_before
1124	n/a
1125	n/a	assert isinstance(stack_after, list)
1126	n/a	for x in stack_after:
1127	n/a	assert isinstance(x, StackObject)
1128	n/a	self.stack_after = stack_after
1129	n/a
1130	n/a	assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
1131	n/a	self.proto = proto
1132	n/a
1133	n/a	assert isinstance(doc, str)
1134	n/a	self.doc = doc
1135	n/a
1136	n/a	I = OpcodeInfo
1137	n/a	opcodes = [
1138	n/a
1139	n/a	# Ways to spell integers.
1140	n/a
1141	n/a	I(name='INT',
1142	n/a	code='I',
1143	n/a	arg=decimalnl_short,
1144	n/a	stack_before=[],
1145	n/a	stack_after=[pyinteger_or_bool],
1146	n/a	proto=0,
1147	n/a	doc="""Push an integer or bool.
1148	n/a
1149	n/a	The argument is a newline-terminated decimal literal string.
1150	n/a
1151	n/a	The intent may have been that this always fit in a short Python int,
1152	n/a	but INT can be generated in pickles written on a 64-bit box that
1153	n/a	require a Python long on a 32-bit box. The difference between this
1154	n/a	and LONG then is that INT skips a trailing 'L', and produces a short
1155	n/a	int whenever possible.
1156	n/a
1157	n/a	Another difference is due to that, when bool was introduced as a
1158	n/a	distinct type in 2.3, builtin names True and False were also added to
1159	n/a	2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
1160	n/a	True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
1161	n/a	Leading zeroes are never produced for a genuine integer. The 2.3
1162	n/a	(and later) unpicklers special-case these and return bool instead;
1163	n/a	earlier unpicklers ignore the leading "0" and return the int.
1164	n/a	"""),
1165	n/a
1166	n/a	I(name='BININT',
1167	n/a	code='J',
1168	n/a	arg=int4,
1169	n/a	stack_before=[],
1170	n/a	stack_after=[pyint],
1171	n/a	proto=1,
1172	n/a	doc="""Push a four-byte signed integer.
1173	n/a
1174	n/a	This handles the full range of Python (short) integers on a 32-bit
1175	n/a	box, directly as binary bytes (1 for the opcode and 4 for the integer).
1176	n/a	If the integer is non-negative and fits in 1 or 2 bytes, pickling via
1177	n/a	BININT1 or BININT2 saves space.
1178	n/a	"""),
1179	n/a
1180	n/a	I(name='BININT1',
1181	n/a	code='K',
1182	n/a	arg=uint1,
1183	n/a	stack_before=[],
1184	n/a	stack_after=[pyint],
1185	n/a	proto=1,
1186	n/a	doc="""Push a one-byte unsigned integer.
1187	n/a
1188	n/a	This is a space optimization for pickling very small non-negative ints,
1189	n/a	in range(256).
1190	n/a	"""),
1191	n/a
1192	n/a	I(name='BININT2',
1193	n/a	code='M',
1194	n/a	arg=uint2,
1195	n/a	stack_before=[],
1196	n/a	stack_after=[pyint],
1197	n/a	proto=1,
1198	n/a	doc="""Push a two-byte unsigned integer.
1199	n/a
1200	n/a	This is a space optimization for pickling small positive ints, in
1201	n/a	range(256, 2**16). Integers in range(256) can also be pickled via
1202	n/a	BININT2, but BININT1 instead saves a byte.
1203	n/a	"""),
1204	n/a
1205	n/a	I(name='LONG',
1206	n/a	code='L',
1207	n/a	arg=decimalnl_long,
1208	n/a	stack_before=[],
1209	n/a	stack_after=[pyint],
1210	n/a	proto=0,
1211	n/a	doc="""Push a long integer.
1212	n/a
1213	n/a	The same as INT, except that the literal ends with 'L', and always
1214	n/a	unpickles to a Python long. There doesn't seem a real purpose to the
1215	n/a	trailing 'L'.
1216	n/a
1217	n/a	Note that LONG takes time quadratic in the number of digits when
1218	n/a	unpickling (this is simply due to the nature of decimal->binary
1219	n/a	conversion). Proto 2 added linear-time (in C; still quadratic-time
1220	n/a	in Python) LONG1 and LONG4 opcodes.
1221	n/a	"""),
1222	n/a
1223	n/a	I(name="LONG1",
1224	n/a	code='\x8a',
1225	n/a	arg=long1,
1226	n/a	stack_before=[],
1227	n/a	stack_after=[pyint],
1228	n/a	proto=2,
1229	n/a	doc="""Long integer using one-byte length.
1230	n/a
1231	n/a	A more efficient encoding of a Python long; the long1 encoding
1232	n/a	says it all."""),
1233	n/a
1234	n/a	I(name="LONG4",
1235	n/a	code='\x8b',
1236	n/a	arg=long4,
1237	n/a	stack_before=[],
1238	n/a	stack_after=[pyint],
1239	n/a	proto=2,
1240	n/a	doc="""Long integer using found-byte length.
1241	n/a
1242	n/a	A more efficient encoding of a Python long; the long4 encoding
1243	n/a	says it all."""),
1244	n/a
1245	n/a	# Ways to spell strings (8-bit, not Unicode).
1246	n/a
1247	n/a	I(name='STRING',
1248	n/a	code='S',
1249	n/a	arg=stringnl,
1250	n/a	stack_before=[],
1251	n/a	stack_after=[pybytes_or_str],
1252	n/a	proto=0,
1253	n/a	doc="""Push a Python string object.
1254	n/a
1255	n/a	The argument is a repr-style string, with bracketing quote characters,
1256	n/a	and perhaps embedded escapes. The argument extends until the next
1257	n/a	newline character. These are usually decoded into a str instance
1258	n/a	using the encoding given to the Unpickler constructor. or the default,
1259	n/a	'ASCII'. If the encoding given was 'bytes' however, they will be
1260	n/a	decoded as bytes object instead.
1261	n/a	"""),
1262	n/a
1263	n/a	I(name='BINSTRING',
1264	n/a	code='T',
1265	n/a	arg=string4,
1266	n/a	stack_before=[],
1267	n/a	stack_after=[pybytes_or_str],
1268	n/a	proto=1,
1269	n/a	doc="""Push a Python string object.
1270	n/a
1271	n/a	There are two arguments: the first is a 4-byte little-endian
1272	n/a	signed int giving the number of bytes in the string, and the
1273	n/a	second is that many bytes, which are taken literally as the string
1274	n/a	content. These are usually decoded into a str instance using the
1275	n/a	encoding given to the Unpickler constructor. or the default,
1276	n/a	'ASCII'. If the encoding given was 'bytes' however, they will be
1277	n/a	decoded as bytes object instead.
1278	n/a	"""),
1279	n/a
1280	n/a	I(name='SHORT_BINSTRING',
1281	n/a	code='U',
1282	n/a	arg=string1,
1283	n/a	stack_before=[],
1284	n/a	stack_after=[pybytes_or_str],
1285	n/a	proto=1,
1286	n/a	doc="""Push a Python string object.
1287	n/a
1288	n/a	There are two arguments: the first is a 1-byte unsigned int giving
1289	n/a	the number of bytes in the string, and the second is that many
1290	n/a	bytes, which are taken literally as the string content. These are
1291	n/a	usually decoded into a str instance using the encoding given to
1292	n/a	the Unpickler constructor. or the default, 'ASCII'. If the
1293	n/a	encoding given was 'bytes' however, they will be decoded as bytes
1294	n/a	object instead.
1295	n/a	"""),
1296	n/a
1297	n/a	# Bytes (protocol 3 only; older protocols don't support bytes at all)
1298	n/a
1299	n/a	I(name='BINBYTES',
1300	n/a	code='B',
1301	n/a	arg=bytes4,
1302	n/a	stack_before=[],
1303	n/a	stack_after=[pybytes],
1304	n/a	proto=3,
1305	n/a	doc="""Push a Python bytes object.
1306	n/a
1307	n/a	There are two arguments: the first is a 4-byte little-endian unsigned int
1308	n/a	giving the number of bytes, and the second is that many bytes, which are
1309	n/a	taken literally as the bytes content.
1310	n/a	"""),
1311	n/a
1312	n/a	I(name='SHORT_BINBYTES',
1313	n/a	code='C',
1314	n/a	arg=bytes1,
1315	n/a	stack_before=[],
1316	n/a	stack_after=[pybytes],
1317	n/a	proto=3,
1318	n/a	doc="""Push a Python bytes object.
1319	n/a
1320	n/a	There are two arguments: the first is a 1-byte unsigned int giving
1321	n/a	the number of bytes, and the second is that many bytes, which are taken
1322	n/a	literally as the string content.
1323	n/a	"""),
1324	n/a
1325	n/a	I(name='BINBYTES8',
1326	n/a	code='\x8e',
1327	n/a	arg=bytes8,
1328	n/a	stack_before=[],
1329	n/a	stack_after=[pybytes],
1330	n/a	proto=4,
1331	n/a	doc="""Push a Python bytes object.
1332	n/a
1333	n/a	There are two arguments: the first is an 8-byte unsigned int giving
1334	n/a	the number of bytes in the string, and the second is that many bytes,
1335	n/a	which are taken literally as the string content.
1336	n/a	"""),
1337	n/a
1338	n/a	# Ways to spell None.
1339	n/a
1340	n/a	I(name='NONE',
1341	n/a	code='N',
1342	n/a	arg=None,
1343	n/a	stack_before=[],
1344	n/a	stack_after=[pynone],
1345	n/a	proto=0,
1346	n/a	doc="Push None on the stack."),
1347	n/a
1348	n/a	# Ways to spell bools, starting with proto 2. See INT for how this was
1349	n/a	# done before proto 2.
1350	n/a
1351	n/a	I(name='NEWTRUE',
1352	n/a	code='\x88',
1353	n/a	arg=None,
1354	n/a	stack_before=[],
1355	n/a	stack_after=[pybool],
1356	n/a	proto=2,
1357	n/a	doc="""True.
1358	n/a
1359	n/a	Push True onto the stack."""),
1360	n/a
1361	n/a	I(name='NEWFALSE',
1362	n/a	code='\x89',
1363	n/a	arg=None,
1364	n/a	stack_before=[],
1365	n/a	stack_after=[pybool],
1366	n/a	proto=2,
1367	n/a	doc="""True.
1368	n/a
1369	n/a	Push False onto the stack."""),
1370	n/a
1371	n/a	# Ways to spell Unicode strings.
1372	n/a
1373	n/a	I(name='UNICODE',
1374	n/a	code='V',
1375	n/a	arg=unicodestringnl,
1376	n/a	stack_before=[],
1377	n/a	stack_after=[pyunicode],
1378	n/a	proto=0, # this may be pure-text, but it's a later addition
1379	n/a	doc="""Push a Python Unicode string object.
1380	n/a
1381	n/a	The argument is a raw-unicode-escape encoding of a Unicode string,
1382	n/a	and so may contain embedded escape sequences. The argument extends
1383	n/a	until the next newline character.
1384	n/a	"""),
1385	n/a
1386	n/a	I(name='SHORT_BINUNICODE',
1387	n/a	code='\x8c',
1388	n/a	arg=unicodestring1,
1389	n/a	stack_before=[],
1390	n/a	stack_after=[pyunicode],
1391	n/a	proto=4,
1392	n/a	doc="""Push a Python Unicode string object.
1393	n/a
1394	n/a	There are two arguments: the first is a 1-byte little-endian signed int
1395	n/a	giving the number of bytes in the string. The second is that many
1396	n/a	bytes, and is the UTF-8 encoding of the Unicode string.
1397	n/a	"""),
1398	n/a
1399	n/a	I(name='BINUNICODE',
1400	n/a	code='X',
1401	n/a	arg=unicodestring4,
1402	n/a	stack_before=[],
1403	n/a	stack_after=[pyunicode],
1404	n/a	proto=1,
1405	n/a	doc="""Push a Python Unicode string object.
1406	n/a
1407	n/a	There are two arguments: the first is a 4-byte little-endian unsigned int
1408	n/a	giving the number of bytes in the string. The second is that many
1409	n/a	bytes, and is the UTF-8 encoding of the Unicode string.
1410	n/a	"""),
1411	n/a
1412	n/a	I(name='BINUNICODE8',
1413	n/a	code='\x8d',
1414	n/a	arg=unicodestring8,
1415	n/a	stack_before=[],
1416	n/a	stack_after=[pyunicode],
1417	n/a	proto=4,
1418	n/a	doc="""Push a Python Unicode string object.
1419	n/a
1420	n/a	There are two arguments: the first is an 8-byte little-endian signed int
1421	n/a	giving the number of bytes in the string. The second is that many
1422	n/a	bytes, and is the UTF-8 encoding of the Unicode string.
1423	n/a	"""),
1424	n/a
1425	n/a	# Ways to spell floats.
1426	n/a
1427	n/a	I(name='FLOAT',
1428	n/a	code='F',
1429	n/a	arg=floatnl,
1430	n/a	stack_before=[],
1431	n/a	stack_after=[pyfloat],
1432	n/a	proto=0,
1433	n/a	doc="""Newline-terminated decimal float literal.
1434	n/a
1435	n/a	The argument is repr(a_float), and in general requires 17 significant
1436	n/a	digits for roundtrip conversion to be an identity (this is so for
1437	n/a	IEEE-754 double precision values, which is what Python float maps to
1438	n/a	on most boxes).
1439	n/a
1440	n/a	In general, FLOAT cannot be used to transport infinities, NaNs, or
1441	n/a	minus zero across boxes (or even on a single box, if the platform C
1442	n/a	library can't read the strings it produces for such things -- Windows
1443	n/a	is like that), but may do less damage than BINFLOAT on boxes with
1444	n/a	greater precision or dynamic range than IEEE-754 double.
1445	n/a	"""),
1446	n/a
1447	n/a	I(name='BINFLOAT',
1448	n/a	code='G',
1449	n/a	arg=float8,
1450	n/a	stack_before=[],
1451	n/a	stack_after=[pyfloat],
1452	n/a	proto=1,
1453	n/a	doc="""Float stored in binary form, with 8 bytes of data.
1454	n/a
1455	n/a	This generally requires less than half the space of FLOAT encoding.
1456	n/a	In general, BINFLOAT cannot be used to transport infinities, NaNs, or
1457	n/a	minus zero, raises an exception if the exponent exceeds the range of
1458	n/a	an IEEE-754 double, and retains no more than 53 bits of precision (if
1459	n/a	there are more than that, "add a half and chop" rounding is used to
1460	n/a	cut it back to 53 significant bits).
1461	n/a	"""),
1462	n/a
1463	n/a	# Ways to build lists.
1464	n/a
1465	n/a	I(name='EMPTY_LIST',
1466	n/a	code=']',
1467	n/a	arg=None,
1468	n/a	stack_before=[],
1469	n/a	stack_after=[pylist],
1470	n/a	proto=1,
1471	n/a	doc="Push an empty list."),
1472	n/a
1473	n/a	I(name='APPEND',
1474	n/a	code='a',
1475	n/a	arg=None,
1476	n/a	stack_before=[pylist, anyobject],
1477	n/a	stack_after=[pylist],
1478	n/a	proto=0,
1479	n/a	doc="""Append an object to a list.
1480	n/a
1481	n/a	Stack before: ... pylist anyobject
1482	n/a	Stack after: ... pylist+[anyobject]
1483	n/a
1484	n/a	although pylist is really extended in-place.
1485	n/a	"""),
1486	n/a
1487	n/a	I(name='APPENDS',
1488	n/a	code='e',
1489	n/a	arg=None,
1490	n/a	stack_before=[pylist, markobject, stackslice],
1491	n/a	stack_after=[pylist],
1492	n/a	proto=1,
1493	n/a	doc="""Extend a list by a slice of stack objects.
1494	n/a
1495	n/a	Stack before: ... pylist markobject stackslice
1496	n/a	Stack after: ... pylist+stackslice
1497	n/a
1498	n/a	although pylist is really extended in-place.
1499	n/a	"""),
1500	n/a
1501	n/a	I(name='LIST',
1502	n/a	code='l',
1503	n/a	arg=None,
1504	n/a	stack_before=[markobject, stackslice],
1505	n/a	stack_after=[pylist],
1506	n/a	proto=0,
1507	n/a	doc="""Build a list out of the topmost stack slice, after markobject.
1508	n/a
1509	n/a	All the stack entries following the topmost markobject are placed into
1510	n/a	a single Python list, which single list object replaces all of the
1511	n/a	stack from the topmost markobject onward. For example,
1512	n/a
1513	n/a	Stack before: ... markobject 1 2 3 'abc'
1514	n/a	Stack after: ... [1, 2, 3, 'abc']
1515	n/a	"""),
1516	n/a
1517	n/a	# Ways to build tuples.
1518	n/a
1519	n/a	I(name='EMPTY_TUPLE',
1520	n/a	code=')',
1521	n/a	arg=None,
1522	n/a	stack_before=[],
1523	n/a	stack_after=[pytuple],
1524	n/a	proto=1,
1525	n/a	doc="Push an empty tuple."),
1526	n/a
1527	n/a	I(name='TUPLE',
1528	n/a	code='t',
1529	n/a	arg=None,
1530	n/a	stack_before=[markobject, stackslice],
1531	n/a	stack_after=[pytuple],
1532	n/a	proto=0,
1533	n/a	doc="""Build a tuple out of the topmost stack slice, after markobject.
1534	n/a
1535	n/a	All the stack entries following the topmost markobject are placed into
1536	n/a	a single Python tuple, which single tuple object replaces all of the
1537	n/a	stack from the topmost markobject onward. For example,
1538	n/a
1539	n/a	Stack before: ... markobject 1 2 3 'abc'
1540	n/a	Stack after: ... (1, 2, 3, 'abc')
1541	n/a	"""),
1542	n/a
1543	n/a	I(name='TUPLE1',
1544	n/a	code='\x85',
1545	n/a	arg=None,
1546	n/a	stack_before=[anyobject],
1547	n/a	stack_after=[pytuple],
1548	n/a	proto=2,
1549	n/a	doc="""Build a one-tuple out of the topmost item on the stack.
1550	n/a
1551	n/a	This code pops one value off the stack and pushes a tuple of
1552	n/a	length 1 whose one item is that value back onto it. In other
1553	n/a	words:
1554	n/a
1555	n/a	stack[-1] = tuple(stack[-1:])
1556	n/a	"""),
1557	n/a
1558	n/a	I(name='TUPLE2',
1559	n/a	code='\x86',
1560	n/a	arg=None,
1561	n/a	stack_before=[anyobject, anyobject],
1562	n/a	stack_after=[pytuple],
1563	n/a	proto=2,
1564	n/a	doc="""Build a two-tuple out of the top two items on the stack.
1565	n/a
1566	n/a	This code pops two values off the stack and pushes a tuple of
1567	n/a	length 2 whose items are those values back onto it. In other
1568	n/a	words:
1569	n/a
1570	n/a	stack[-2:] = [tuple(stack[-2:])]
1571	n/a	"""),
1572	n/a
1573	n/a	I(name='TUPLE3',
1574	n/a	code='\x87',
1575	n/a	arg=None,
1576	n/a	stack_before=[anyobject, anyobject, anyobject],
1577	n/a	stack_after=[pytuple],
1578	n/a	proto=2,
1579	n/a	doc="""Build a three-tuple out of the top three items on the stack.
1580	n/a
1581	n/a	This code pops three values off the stack and pushes a tuple of
1582	n/a	length 3 whose items are those values back onto it. In other
1583	n/a	words:
1584	n/a
1585	n/a	stack[-3:] = [tuple(stack[-3:])]
1586	n/a	"""),
1587	n/a
1588	n/a	# Ways to build dicts.
1589	n/a
1590	n/a	I(name='EMPTY_DICT',
1591	n/a	code='}',
1592	n/a	arg=None,
1593	n/a	stack_before=[],
1594	n/a	stack_after=[pydict],
1595	n/a	proto=1,
1596	n/a	doc="Push an empty dict."),
1597	n/a
1598	n/a	I(name='DICT',
1599	n/a	code='d',
1600	n/a	arg=None,
1601	n/a	stack_before=[markobject, stackslice],
1602	n/a	stack_after=[pydict],
1603	n/a	proto=0,
1604	n/a	doc="""Build a dict out of the topmost stack slice, after markobject.
1605	n/a
1606	n/a	All the stack entries following the topmost markobject are placed into
1607	n/a	a single Python dict, which single dict object replaces all of the
1608	n/a	stack from the topmost markobject onward. The stack slice alternates
1609	n/a	key, value, key, value, .... For example,
1610	n/a
1611	n/a	Stack before: ... markobject 1 2 3 'abc'
1612	n/a	Stack after: ... {1: 2, 3: 'abc'}
1613	n/a	"""),
1614	n/a
1615	n/a	I(name='SETITEM',
1616	n/a	code='s',
1617	n/a	arg=None,
1618	n/a	stack_before=[pydict, anyobject, anyobject],
1619	n/a	stack_after=[pydict],
1620	n/a	proto=0,
1621	n/a	doc="""Add a key+value pair to an existing dict.
1622	n/a
1623	n/a	Stack before: ... pydict key value
1624	n/a	Stack after: ... pydict
1625	n/a
1626	n/a	where pydict has been modified via pydict[key] = value.
1627	n/a	"""),
1628	n/a
1629	n/a	I(name='SETITEMS',
1630	n/a	code='u',
1631	n/a	arg=None,
1632	n/a	stack_before=[pydict, markobject, stackslice],
1633	n/a	stack_after=[pydict],
1634	n/a	proto=1,
1635	n/a	doc="""Add an arbitrary number of key+value pairs to an existing dict.
1636	n/a
1637	n/a	The slice of the stack following the topmost markobject is taken as
1638	n/a	an alternating sequence of keys and values, added to the dict
1639	n/a	immediately under the topmost markobject. Everything at and after the
1640	n/a	topmost markobject is popped, leaving the mutated dict at the top
1641	n/a	of the stack.
1642	n/a
1643	n/a	Stack before: ... pydict markobject key_1 value_1 ... key_n value_n
1644	n/a	Stack after: ... pydict
1645	n/a
1646	n/a	where pydict has been modified via pydict[key_i] = value_i for i in
1647	n/a	1, 2, ..., n, and in that order.
1648	n/a	"""),
1649	n/a
1650	n/a	# Ways to build sets
1651	n/a
1652	n/a	I(name='EMPTY_SET',
1653	n/a	code='\x8f',
1654	n/a	arg=None,
1655	n/a	stack_before=[],
1656	n/a	stack_after=[pyset],
1657	n/a	proto=4,
1658	n/a	doc="Push an empty set."),
1659	n/a
1660	n/a	I(name='ADDITEMS',
1661	n/a	code='\x90',
1662	n/a	arg=None,
1663	n/a	stack_before=[pyset, markobject, stackslice],
1664	n/a	stack_after=[pyset],
1665	n/a	proto=4,
1666	n/a	doc="""Add an arbitrary number of items to an existing set.
1667	n/a
1668	n/a	The slice of the stack following the topmost markobject is taken as
1669	n/a	a sequence of items, added to the set immediately under the topmost
1670	n/a	markobject. Everything at and after the topmost markobject is popped,
1671	n/a	leaving the mutated set at the top of the stack.
1672	n/a
1673	n/a	Stack before: ... pyset markobject item_1 ... item_n
1674	n/a	Stack after: ... pyset
1675	n/a
1676	n/a	where pyset has been modified via pyset.add(item_i) = item_i for i in
1677	n/a	1, 2, ..., n, and in that order.
1678	n/a	"""),
1679	n/a
1680	n/a	# Way to build frozensets
1681	n/a
1682	n/a	I(name='FROZENSET',
1683	n/a	code='\x91',
1684	n/a	arg=None,
1685	n/a	stack_before=[markobject, stackslice],
1686	n/a	stack_after=[pyfrozenset],
1687	n/a	proto=4,
1688	n/a	doc="""Build a frozenset out of the topmost slice, after markobject.
1689	n/a
1690	n/a	All the stack entries following the topmost markobject are placed into
1691	n/a	a single Python frozenset, which single frozenset object replaces all
1692	n/a	of the stack from the topmost markobject onward. For example,
1693	n/a
1694	n/a	Stack before: ... markobject 1 2 3
1695	n/a	Stack after: ... frozenset({1, 2, 3})
1696	n/a	"""),
1697	n/a
1698	n/a	# Stack manipulation.
1699	n/a
1700	n/a	I(name='POP',
1701	n/a	code='0',
1702	n/a	arg=None,
1703	n/a	stack_before=[anyobject],
1704	n/a	stack_after=[],
1705	n/a	proto=0,
1706	n/a	doc="Discard the top stack item, shrinking the stack by one item."),
1707	n/a
1708	n/a	I(name='DUP',
1709	n/a	code='2',
1710	n/a	arg=None,
1711	n/a	stack_before=[anyobject],
1712	n/a	stack_after=[anyobject, anyobject],
1713	n/a	proto=0,
1714	n/a	doc="Push the top stack item onto the stack again, duplicating it."),
1715	n/a
1716	n/a	I(name='MARK',
1717	n/a	code='(',
1718	n/a	arg=None,
1719	n/a	stack_before=[],
1720	n/a	stack_after=[markobject],
1721	n/a	proto=0,
1722	n/a	doc="""Push markobject onto the stack.
1723	n/a
1724	n/a	markobject is a unique object, used by other opcodes to identify a
1725	n/a	region of the stack containing a variable number of objects for them
1726	n/a	to work on. See markobject.doc for more detail.
1727	n/a	"""),
1728	n/a
1729	n/a	I(name='POP_MARK',
1730	n/a	code='1',
1731	n/a	arg=None,
1732	n/a	stack_before=[markobject, stackslice],
1733	n/a	stack_after=[],
1734	n/a	proto=1,
1735	n/a	doc="""Pop all the stack objects at and above the topmost markobject.
1736	n/a
1737	n/a	When an opcode using a variable number of stack objects is done,
1738	n/a	POP_MARK is used to remove those objects, and to remove the markobject
1739	n/a	that delimited their starting position on the stack.
1740	n/a	"""),
1741	n/a
1742	n/a	# Memo manipulation. There are really only two operations (get and put),
1743	n/a	# each in all-text, "short binary", and "long binary" flavors.
1744	n/a
1745	n/a	I(name='GET',
1746	n/a	code='g',
1747	n/a	arg=decimalnl_short,
1748	n/a	stack_before=[],
1749	n/a	stack_after=[anyobject],
1750	n/a	proto=0,
1751	n/a	doc="""Read an object from the memo and push it on the stack.
1752	n/a
1753	n/a	The index of the memo object to push is given by the newline-terminated
1754	n/a	decimal string following. BINGET and LONG_BINGET are space-optimized
1755	n/a	versions.
1756	n/a	"""),
1757	n/a
1758	n/a	I(name='BINGET',
1759	n/a	code='h',
1760	n/a	arg=uint1,
1761	n/a	stack_before=[],
1762	n/a	stack_after=[anyobject],
1763	n/a	proto=1,
1764	n/a	doc="""Read an object from the memo and push it on the stack.
1765	n/a
1766	n/a	The index of the memo object to push is given by the 1-byte unsigned
1767	n/a	integer following.
1768	n/a	"""),
1769	n/a
1770	n/a	I(name='LONG_BINGET',
1771	n/a	code='j',
1772	n/a	arg=uint4,
1773	n/a	stack_before=[],
1774	n/a	stack_after=[anyobject],
1775	n/a	proto=1,
1776	n/a	doc="""Read an object from the memo and push it on the stack.
1777	n/a
1778	n/a	The index of the memo object to push is given by the 4-byte unsigned
1779	n/a	little-endian integer following.
1780	n/a	"""),
1781	n/a
1782	n/a	I(name='PUT',
1783	n/a	code='p',
1784	n/a	arg=decimalnl_short,
1785	n/a	stack_before=[],
1786	n/a	stack_after=[],
1787	n/a	proto=0,
1788	n/a	doc="""Store the stack top into the memo. The stack is not popped.
1789	n/a
1790	n/a	The index of the memo location to write into is given by the newline-
1791	n/a	terminated decimal string following. BINPUT and LONG_BINPUT are
1792	n/a	space-optimized versions.
1793	n/a	"""),
1794	n/a
1795	n/a	I(name='BINPUT',
1796	n/a	code='q',
1797	n/a	arg=uint1,
1798	n/a	stack_before=[],
1799	n/a	stack_after=[],
1800	n/a	proto=1,
1801	n/a	doc="""Store the stack top into the memo. The stack is not popped.
1802	n/a
1803	n/a	The index of the memo location to write into is given by the 1-byte
1804	n/a	unsigned integer following.
1805	n/a	"""),
1806	n/a
1807	n/a	I(name='LONG_BINPUT',
1808	n/a	code='r',
1809	n/a	arg=uint4,
1810	n/a	stack_before=[],
1811	n/a	stack_after=[],
1812	n/a	proto=1,
1813	n/a	doc="""Store the stack top into the memo. The stack is not popped.
1814	n/a
1815	n/a	The index of the memo location to write into is given by the 4-byte
1816	n/a	unsigned little-endian integer following.
1817	n/a	"""),
1818	n/a
1819	n/a	I(name='MEMOIZE',
1820	n/a	code='\x94',
1821	n/a	arg=None,
1822	n/a	stack_before=[anyobject],
1823	n/a	stack_after=[anyobject],
1824	n/a	proto=4,
1825	n/a	doc="""Store the stack top into the memo. The stack is not popped.
1826	n/a
1827	n/a	The index of the memo location to write is the number of
1828	n/a	elements currently present in the memo.
1829	n/a	"""),
1830	n/a
1831	n/a	# Access the extension registry (predefined objects). Akin to the GET
1832	n/a	# family.
1833	n/a
1834	n/a	I(name='EXT1',
1835	n/a	code='\x82',
1836	n/a	arg=uint1,
1837	n/a	stack_before=[],
1838	n/a	stack_after=[anyobject],
1839	n/a	proto=2,
1840	n/a	doc="""Extension code.
1841	n/a
1842	n/a	This code and the similar EXT2 and EXT4 allow using a registry
1843	n/a	of popular objects that are pickled by name, typically classes.
1844	n/a	It is envisioned that through a global negotiation and
1845	n/a	registration process, third parties can set up a mapping between
1846	n/a	ints and object names.
1847	n/a
1848	n/a	In order to guarantee pickle interchangeability, the extension
1849	n/a	code registry ought to be global, although a range of codes may
1850	n/a	be reserved for private use.
1851	n/a
1852	n/a	EXT1 has a 1-byte integer argument. This is used to index into the
1853	n/a	extension registry, and the object at that index is pushed on the stack.
1854	n/a	"""),
1855	n/a
1856	n/a	I(name='EXT2',
1857	n/a	code='\x83',
1858	n/a	arg=uint2,
1859	n/a	stack_before=[],
1860	n/a	stack_after=[anyobject],
1861	n/a	proto=2,
1862	n/a	doc="""Extension code.
1863	n/a
1864	n/a	See EXT1. EXT2 has a two-byte integer argument.
1865	n/a	"""),
1866	n/a
1867	n/a	I(name='EXT4',
1868	n/a	code='\x84',
1869	n/a	arg=int4,
1870	n/a	stack_before=[],
1871	n/a	stack_after=[anyobject],
1872	n/a	proto=2,
1873	n/a	doc="""Extension code.
1874	n/a
1875	n/a	See EXT1. EXT4 has a four-byte integer argument.
1876	n/a	"""),
1877	n/a
1878	n/a	# Push a class object, or module function, on the stack, via its module
1879	n/a	# and name.
1880	n/a
1881	n/a	I(name='GLOBAL',
1882	n/a	code='c',
1883	n/a	arg=stringnl_noescape_pair,
1884	n/a	stack_before=[],
1885	n/a	stack_after=[anyobject],
1886	n/a	proto=0,
1887	n/a	doc="""Push a global object (module.attr) on the stack.
1888	n/a
1889	n/a	Two newline-terminated strings follow the GLOBAL opcode. The first is
1890	n/a	taken as a module name, and the second as a class name. The class
1891	n/a	object module.class is pushed on the stack. More accurately, the
1892	n/a	object returned by self.find_class(module, class) is pushed on the
1893	n/a	stack, so unpickling subclasses can override this form of lookup.
1894	n/a	"""),
1895	n/a
1896	n/a	I(name='STACK_GLOBAL',
1897	n/a	code='\x93',
1898	n/a	arg=None,
1899	n/a	stack_before=[pyunicode, pyunicode],
1900	n/a	stack_after=[anyobject],
1901	n/a	proto=4,
1902	n/a	doc="""Push a global object (module.attr) on the stack.
1903	n/a	"""),
1904	n/a
1905	n/a	# Ways to build objects of classes pickle doesn't know about directly
1906	n/a	# (user-defined classes). I despair of documenting this accurately
1907	n/a	# and comprehensibly -- you really have to read the pickle code to
1908	n/a	# find all the special cases.
1909	n/a
1910	n/a	I(name='REDUCE',
1911	n/a	code='R',
1912	n/a	arg=None,
1913	n/a	stack_before=[anyobject, anyobject],
1914	n/a	stack_after=[anyobject],
1915	n/a	proto=0,
1916	n/a	doc="""Push an object built from a callable and an argument tuple.
1917	n/a
1918	n/a	The opcode is named to remind of the __reduce__() method.
1919	n/a
1920	n/a	Stack before: ... callable pytuple
1921	n/a	Stack after: ... callable(*pytuple)
1922	n/a
1923	n/a	The callable and the argument tuple are the first two items returned
1924	n/a	by a __reduce__ method. Applying the callable to the argtuple is
1925	n/a	supposed to reproduce the original object, or at least get it started.
1926	n/a	If the __reduce__ method returns a 3-tuple, the last component is an
1927	n/a	argument to be passed to the object's __setstate__, and then the REDUCE
1928	n/a	opcode is followed by code to create setstate's argument, and then a
1929	n/a	BUILD opcode to apply __setstate__ to that argument.
1930	n/a
1931	n/a	If not isinstance(callable, type), REDUCE complains unless the
1932	n/a	callable has been registered with the copyreg module's
1933	n/a	safe_constructors dict, or the callable has a magic
1934	n/a	'__safe_for_unpickling__' attribute with a true value. I'm not sure
1935	n/a	why it does this, but I've sure seen this complaint often enough when
1936	n/a	I didn't want to <wink>.
1937	n/a	"""),
1938	n/a
1939	n/a	I(name='BUILD',
1940	n/a	code='b',
1941	n/a	arg=None,
1942	n/a	stack_before=[anyobject, anyobject],
1943	n/a	stack_after=[anyobject],
1944	n/a	proto=0,
1945	n/a	doc="""Finish building an object, via __setstate__ or dict update.
1946	n/a
1947	n/a	Stack before: ... anyobject argument
1948	n/a	Stack after: ... anyobject
1949	n/a
1950	n/a	where anyobject may have been mutated, as follows:
1951	n/a
1952	n/a	If the object has a __setstate__ method,
1953	n/a
1954	n/a	anyobject.__setstate__(argument)
1955	n/a
1956	n/a	is called.
1957	n/a
1958	n/a	Else the argument must be a dict, the object must have a __dict__, and
1959	n/a	the object is updated via
1960	n/a
1961	n/a	anyobject.__dict__.update(argument)
1962	n/a	"""),
1963	n/a
1964	n/a	I(name='INST',
1965	n/a	code='i',
1966	n/a	arg=stringnl_noescape_pair,
1967	n/a	stack_before=[markobject, stackslice],
1968	n/a	stack_after=[anyobject],
1969	n/a	proto=0,
1970	n/a	doc="""Build a class instance.
1971	n/a
1972	n/a	This is the protocol 0 version of protocol 1's OBJ opcode.
1973	n/a	INST is followed by two newline-terminated strings, giving a
1974	n/a	module and class name, just as for the GLOBAL opcode (and see
1975	n/a	GLOBAL for more details about that). self.find_class(module, name)
1976	n/a	is used to get a class object.
1977	n/a
1978	n/a	In addition, all the objects on the stack following the topmost
1979	n/a	markobject are gathered into a tuple and popped (along with the
1980	n/a	topmost markobject), just as for the TUPLE opcode.
1981	n/a
1982	n/a	Now it gets complicated. If all of these are true:
1983	n/a
1984	n/a	+ The argtuple is empty (markobject was at the top of the stack
1985	n/a	at the start).
1986	n/a
1987	n/a	+ The class object does not have a __getinitargs__ attribute.
1988	n/a
1989	n/a	then we want to create an old-style class instance without invoking
1990	n/a	its __init__() method (pickle has waffled on this over the years; not
1991	n/a	calling __init__() is current wisdom). In this case, an instance of
1992	n/a	an old-style dummy class is created, and then we try to rebind its
1993	n/a	__class__ attribute to the desired class object. If this succeeds,
1994	n/a	the new instance object is pushed on the stack, and we're done.
1995	n/a
1996	n/a	Else (the argtuple is not empty, it's not an old-style class object,
1997	n/a	or the class object does have a __getinitargs__ attribute), the code
1998	n/a	first insists that the class object have a __safe_for_unpickling__
1999	n/a	attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
2000	n/a	it doesn't matter whether this attribute has a true or false value, it
2001	n/a	only matters whether it exists (XXX this is a bug). If
2002	n/a	__safe_for_unpickling__ doesn't exist, UnpicklingError is raised.
2003	n/a
2004	n/a	Else (the class object does have a __safe_for_unpickling__ attr),
2005	n/a	the class object obtained from INST's arguments is applied to the
2006	n/a	argtuple obtained from the stack, and the resulting instance object
2007	n/a	is pushed on the stack.
2008	n/a
2009	n/a	NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
2010	n/a	NOTE: the distinction between old-style and new-style classes does
2011	n/a	not make sense in Python 3.
2012	n/a	"""),
2013	n/a
2014	n/a	I(name='OBJ',
2015	n/a	code='o',
2016	n/a	arg=None,
2017	n/a	stack_before=[markobject, anyobject, stackslice],
2018	n/a	stack_after=[anyobject],
2019	n/a	proto=1,
2020	n/a	doc="""Build a class instance.
2021	n/a
2022	n/a	This is the protocol 1 version of protocol 0's INST opcode, and is
2023	n/a	very much like it. The major difference is that the class object
2024	n/a	is taken off the stack, allowing it to be retrieved from the memo
2025	n/a	repeatedly if several instances of the same class are created. This
2026	n/a	can be much more efficient (in both time and space) than repeatedly
2027	n/a	embedding the module and class names in INST opcodes.
2028	n/a
2029	n/a	Unlike INST, OBJ takes no arguments from the opcode stream. Instead
2030	n/a	the class object is taken off the stack, immediately above the
2031	n/a	topmost markobject:
2032	n/a
2033	n/a	Stack before: ... markobject classobject stackslice
2034	n/a	Stack after: ... new_instance_object
2035	n/a
2036	n/a	As for INST, the remainder of the stack above the markobject is
2037	n/a	gathered into an argument tuple, and then the logic seems identical,
2038	n/a	except that no __safe_for_unpickling__ check is done (XXX this is
2039	n/a	a bug). See INST for the gory details.
2040	n/a
2041	n/a	NOTE: In Python 2.3, INST and OBJ are identical except for how they
2042	n/a	get the class object. That was always the intent; the implementations
2043	n/a	had diverged for accidental reasons.
2044	n/a	"""),
2045	n/a
2046	n/a	I(name='NEWOBJ',
2047	n/a	code='\x81',
2048	n/a	arg=None,
2049	n/a	stack_before=[anyobject, anyobject],
2050	n/a	stack_after=[anyobject],
2051	n/a	proto=2,
2052	n/a	doc="""Build an object instance.
2053	n/a
2054	n/a	The stack before should be thought of as containing a class
2055	n/a	object followed by an argument tuple (the tuple being the stack
2056	n/a	top). Call these cls and args. They are popped off the stack,
2057	n/a	and the value returned by cls.__new__(cls, *args) is pushed back
2058	n/a	onto the stack.
2059	n/a	"""),
2060	n/a
2061	n/a	I(name='NEWOBJ_EX',
2062	n/a	code='\x92',
2063	n/a	arg=None,
2064	n/a	stack_before=[anyobject, anyobject, anyobject],
2065	n/a	stack_after=[anyobject],
2066	n/a	proto=4,
2067	n/a	doc="""Build an object instance.
2068	n/a
2069	n/a	The stack before should be thought of as containing a class
2070	n/a	object followed by an argument tuple and by a keyword argument dict
2071	n/a	(the dict being the stack top). Call these cls and args. They are
2072	n/a	popped off the stack, and the value returned by
2073	n/a	cls.__new__(cls, args, kwargs) is pushed back onto the stack.
2074	n/a	"""),
2075	n/a
2076	n/a	# Machine control.
2077	n/a
2078	n/a	I(name='PROTO',
2079	n/a	code='\x80',
2080	n/a	arg=uint1,
2081	n/a	stack_before=[],
2082	n/a	stack_after=[],
2083	n/a	proto=2,
2084	n/a	doc="""Protocol version indicator.
2085	n/a
2086	n/a	For protocol 2 and above, a pickle must start with this opcode.
2087	n/a	The argument is the protocol version, an int in range(2, 256).
2088	n/a	"""),
2089	n/a
2090	n/a	I(name='STOP',
2091	n/a	code='.',
2092	n/a	arg=None,
2093	n/a	stack_before=[anyobject],
2094	n/a	stack_after=[],
2095	n/a	proto=0,
2096	n/a	doc="""Stop the unpickling machine.
2097	n/a
2098	n/a	Every pickle ends with this opcode. The object at the top of the stack
2099	n/a	is popped, and that's the result of unpickling. The stack should be
2100	n/a	empty then.
2101	n/a	"""),
2102	n/a
2103	n/a	# Framing support.
2104	n/a
2105	n/a	I(name='FRAME',
2106	n/a	code='\x95',
2107	n/a	arg=uint8,
2108	n/a	stack_before=[],
2109	n/a	stack_after=[],
2110	n/a	proto=4,
2111	n/a	doc="""Indicate the beginning of a new frame.
2112	n/a
2113	n/a	The unpickler may use this opcode to safely prefetch data from its
2114	n/a	underlying stream.
2115	n/a	"""),
2116	n/a
2117	n/a	# Ways to deal with persistent IDs.
2118	n/a
2119	n/a	I(name='PERSID',
2120	n/a	code='P',
2121	n/a	arg=stringnl_noescape,
2122	n/a	stack_before=[],
2123	n/a	stack_after=[anyobject],
2124	n/a	proto=0,
2125	n/a	doc="""Push an object identified by a persistent ID.
2126	n/a
2127	n/a	The pickle module doesn't define what a persistent ID means. PERSID's
2128	n/a	argument is a newline-terminated str-style (no embedded escapes, no
2129	n/a	bracketing quote characters) string, which is "the persistent ID".
2130	n/a	The unpickler passes this string to self.persistent_load(). Whatever
2131	n/a	object that returns is pushed on the stack. There is no implementation
2132	n/a	of persistent_load() in Python's unpickler: it must be supplied by an
2133	n/a	unpickler subclass.
2134	n/a	"""),
2135	n/a
2136	n/a	I(name='BINPERSID',
2137	n/a	code='Q',
2138	n/a	arg=None,
2139	n/a	stack_before=[anyobject],
2140	n/a	stack_after=[anyobject],
2141	n/a	proto=1,
2142	n/a	doc="""Push an object identified by a persistent ID.
2143	n/a
2144	n/a	Like PERSID, except the persistent ID is popped off the stack (instead
2145	n/a	of being a string embedded in the opcode bytestream). The persistent
2146	n/a	ID is passed to self.persistent_load(), and whatever object that
2147	n/a	returns is pushed on the stack. See PERSID for more detail.
2148	n/a	"""),
2149	n/a	]
2150	n/a	del I
2151	n/a
2152	n/a	# Verify uniqueness of .name and .code members.
2153	n/a	name2i = {}
2154	n/a	code2i = {}
2155	n/a
2156	n/a	for i, d in enumerate(opcodes):
2157	n/a	if d.name in name2i:
2158	n/a	raise ValueError("repeated name %r at indices %d and %d" %
2159	n/a	(d.name, name2i[d.name], i))
2160	n/a	if d.code in code2i:
2161	n/a	raise ValueError("repeated code %r at indices %d and %d" %
2162	n/a	(d.code, code2i[d.code], i))
2163	n/a
2164	n/a	name2i[d.name] = i
2165	n/a	code2i[d.code] = i
2166	n/a
2167	n/a	del name2i, code2i, i, d
2168	n/a
2169	n/a	##############################################################################
2170	n/a	# Build a code2op dict, mapping opcode characters to OpcodeInfo records.
2171	n/a	# Also ensure we've got the same stuff as pickle.py, although the
2172	n/a	# introspection here is dicey.
2173	n/a
2174	n/a	code2op = {}
2175	n/a	for d in opcodes:
2176	n/a	code2op[d.code] = d
2177	n/a	del d
2178	n/a
2179	n/a	def assure_pickle_consistency(verbose=False):
2180	n/a
2181	n/a	copy = code2op.copy()
2182	n/a	for name in pickle.__all__:
2183	n/a	if not re.match("[A-Z][A-Z0-9_]+$", name):
2184	n/a	if verbose:
2185	n/a	print("skipping %r: it doesn't look like an opcode name" % name)
2186	n/a	continue
2187	n/a	picklecode = getattr(pickle, name)
2188	n/a	if not isinstance(picklecode, bytes) or len(picklecode) != 1:
2189	n/a	if verbose:
2190	n/a	print(("skipping %r: value %r doesn't look like a pickle "
2191	n/a	"code" % (name, picklecode)))
2192	n/a	continue
2193	n/a	picklecode = picklecode.decode("latin-1")
2194	n/a	if picklecode in copy:
2195	n/a	if verbose:
2196	n/a	print("checking name %r w/ code %r for consistency" % (
2197	n/a	name, picklecode))
2198	n/a	d = copy[picklecode]
2199	n/a	if d.name != name:
2200	n/a	raise ValueError("for pickle code %r, pickle.py uses name %r "
2201	n/a	"but we're using name %r" % (picklecode,
2202	n/a	name,
2203	n/a	d.name))
2204	n/a	# Forget this one. Any left over in copy at the end are a problem
2205	n/a	# of a different kind.
2206	n/a	del copy[picklecode]
2207	n/a	else:
2208	n/a	raise ValueError("pickle.py appears to have a pickle opcode with "
2209	n/a	"name %r and code %r, but we don't" %
2210	n/a	(name, picklecode))
2211	n/a	if copy:
2212	n/a	msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"]
2213	n/a	for code, d in copy.items():
2214	n/a	msg.append(" name %r with code %r" % (d.name, code))
2215	n/a	raise ValueError("\n".join(msg))
2216	n/a
2217	n/a	assure_pickle_consistency()
2218	n/a	del assure_pickle_consistency
2219	n/a
2220	n/a	##############################################################################
2221	n/a	# A pickle opcode generator.
2222	n/a
2223	n/a	def _genops(data, yield_end_pos=False):
2224	n/a	if isinstance(data, bytes_types):
2225	n/a	data = io.BytesIO(data)
2226	n/a
2227	n/a	if hasattr(data, "tell"):
2228	n/a	getpos = data.tell
2229	n/a	else:
2230	n/a	getpos = lambda: None
2231	n/a
2232	n/a	while True:
2233	n/a	pos = getpos()
2234	n/a	code = data.read(1)
2235	n/a	opcode = code2op.get(code.decode("latin-1"))
2236	n/a	if opcode is None:
2237	n/a	if code == b"":
2238	n/a	raise ValueError("pickle exhausted before seeing STOP")
2239	n/a	else:
2240	n/a	raise ValueError("at position %s, opcode %r unknown" % (
2241	n/a	"<unknown>" if pos is None else pos,
2242	n/a	code))
2243	n/a	if opcode.arg is None:
2244	n/a	arg = None
2245	n/a	else:
2246	n/a	arg = opcode.arg.reader(data)
2247	n/a	if yield_end_pos:
2248	n/a	yield opcode, arg, pos, getpos()
2249	n/a	else:
2250	n/a	yield opcode, arg, pos
2251	n/a	if code == b'.':
2252	n/a	assert opcode.name == 'STOP'
2253	n/a	break
2254	n/a
2255	n/a	def genops(pickle):
2256	n/a	"""Generate all the opcodes in a pickle.
2257	n/a
2258	n/a	'pickle' is a file-like object, or string, containing the pickle.
2259	n/a
2260	n/a	Each opcode in the pickle is generated, from the current pickle position,
2261	n/a	stopping after a STOP opcode is delivered. A triple is generated for
2262	n/a	each opcode:
2263	n/a
2264	n/a	opcode, arg, pos
2265	n/a
2266	n/a	opcode is an OpcodeInfo record, describing the current opcode.
2267	n/a
2268	n/a	If the opcode has an argument embedded in the pickle, arg is its decoded
2269	n/a	value, as a Python object. If the opcode doesn't have an argument, arg
2270	n/a	is None.
2271	n/a
2272	n/a	If the pickle has a tell() method, pos was the value of pickle.tell()
2273	n/a	before reading the current opcode. If the pickle is a bytes object,
2274	n/a	it's wrapped in a BytesIO object, and the latter's tell() result is
2275	n/a	used. Else (the pickle doesn't have a tell(), and it's not obvious how
2276	n/a	to query its current position) pos is None.
2277	n/a	"""
2278	n/a	return _genops(pickle)
2279	n/a
2280	n/a	##############################################################################
2281	n/a	# A pickle optimizer.
2282	n/a
2283	n/a	def optimize(p):
2284	n/a	'Optimize a pickle string by removing unused PUT opcodes'
2285	n/a	put = 'PUT'
2286	n/a	get = 'GET'
2287	n/a	oldids = set() # set of all PUT ids
2288	n/a	newids = {} # set of ids used by a GET opcode
2289	n/a	opcodes = [] # (op, idx) or (pos, end_pos)
2290	n/a	proto = 0
2291	n/a	protoheader = b''
2292	n/a	for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
2293	n/a	if 'PUT' in opcode.name:
2294	n/a	oldids.add(arg)
2295	n/a	opcodes.append((put, arg))
2296	n/a	elif opcode.name == 'MEMOIZE':
2297	n/a	idx = len(oldids)
2298	n/a	oldids.add(idx)
2299	n/a	opcodes.append((put, idx))
2300	n/a	elif 'FRAME' in opcode.name:
2301	n/a	pass
2302	n/a	elif 'GET' in opcode.name:
2303	n/a	if opcode.proto > proto:
2304	n/a	proto = opcode.proto
2305	n/a	newids[arg] = None
2306	n/a	opcodes.append((get, arg))
2307	n/a	elif opcode.name == 'PROTO':
2308	n/a	if arg > proto:
2309	n/a	proto = arg
2310	n/a	if pos == 0:
2311	n/a	protoheader = p[pos: end_pos]
2312	n/a	else:
2313	n/a	opcodes.append((pos, end_pos))
2314	n/a	else:
2315	n/a	opcodes.append((pos, end_pos))
2316	n/a	del oldids
2317	n/a
2318	n/a	# Copy the opcodes except for PUTS without a corresponding GET
2319	n/a	out = io.BytesIO()
2320	n/a	# Write the PROTO header before any framing
2321	n/a	out.write(protoheader)
2322	n/a	pickler = pickle._Pickler(out, proto)
2323	n/a	if proto >= 4:
2324	n/a	pickler.framer.start_framing()
2325	n/a	idx = 0
2326	n/a	for op, arg in opcodes:
2327	n/a	if op is put:
2328	n/a	if arg not in newids:
2329	n/a	continue
2330	n/a	data = pickler.put(idx)
2331	n/a	newids[arg] = idx
2332	n/a	idx += 1
2333	n/a	elif op is get:
2334	n/a	data = pickler.get(newids[arg])
2335	n/a	else:
2336	n/a	data = p[op:arg]
2337	n/a	pickler.framer.commit_frame()
2338	n/a	pickler.write(data)
2339	n/a	pickler.framer.end_framing()
2340	n/a	return out.getvalue()
2341	n/a
2342	n/a	##############################################################################
2343	n/a	# A symbolic pickle disassembler.
2344	n/a
2345	n/a	def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0):
2346	n/a	"""Produce a symbolic disassembly of a pickle.
2347	n/a
2348	n/a	'pickle' is a file-like object, or string, containing a (at least one)
2349	n/a	pickle. The pickle is disassembled from the current position, through
2350	n/a	the first STOP opcode encountered.
2351	n/a
2352	n/a	Optional arg 'out' is a file-like object to which the disassembly is
2353	n/a	printed. It defaults to sys.stdout.
2354	n/a
2355	n/a	Optional arg 'memo' is a Python dict, used as the pickle's memo. It
2356	n/a	may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
2357	n/a	Passing the same memo object to another dis() call then allows disassembly
2358	n/a	to proceed across multiple pickles that were all created by the same
2359	n/a	pickler with the same memo. Ordinarily you don't need to worry about this.
2360	n/a
2361	n/a	Optional arg 'indentlevel' is the number of blanks by which to indent
2362	n/a	a new MARK level. It defaults to 4.
2363	n/a
2364	n/a	Optional arg 'annotate' if nonzero instructs dis() to add short
2365	n/a	description of the opcode on each line of disassembled output.
2366	n/a	The value given to 'annotate' must be an integer and is used as a
2367	n/a	hint for the column where annotation should start. The default
2368	n/a	value is 0, meaning no annotations.
2369	n/a
2370	n/a	In addition to printing the disassembly, some sanity checks are made:
2371	n/a
2372	n/a	+ All embedded opcode arguments "make sense".
2373	n/a
2374	n/a	+ Explicit and implicit pop operations have enough items on the stack.
2375	n/a
2376	n/a	+ When an opcode implicitly refers to a markobject, a markobject is
2377	n/a	actually on the stack.
2378	n/a
2379	n/a	+ A memo entry isn't referenced before it's defined.
2380	n/a
2381	n/a	+ The markobject isn't stored in the memo.
2382	n/a
2383	n/a	+ A memo entry isn't redefined.
2384	n/a	"""
2385	n/a
2386	n/a	# Most of the hair here is for sanity checks, but most of it is needed
2387	n/a	# anyway to detect when a protocol 0 POP takes a MARK off the stack
2388	n/a	# (which in turn is needed to indent MARK blocks correctly).
2389	n/a
2390	n/a	stack = [] # crude emulation of unpickler stack
2391	n/a	if memo is None:
2392	n/a	memo = {} # crude emulation of unpickler memo
2393	n/a	maxproto = -1 # max protocol number seen
2394	n/a	markstack = [] # bytecode positions of MARK opcodes
2395	n/a	indentchunk = ' ' * indentlevel
2396	n/a	errormsg = None
2397	n/a	annocol = annotate # column hint for annotations
2398	n/a	for opcode, arg, pos in genops(pickle):
2399	n/a	if pos is not None:
2400	n/a	print("%5d:" % pos, end=' ', file=out)
2401	n/a
2402	n/a	line = "%-4s %s%s" % (repr(opcode.code)[1:-1],
2403	n/a	indentchunk * len(markstack),
2404	n/a	opcode.name)
2405	n/a
2406	n/a	maxproto = max(maxproto, opcode.proto)
2407	n/a	before = opcode.stack_before # don't mutate
2408	n/a	after = opcode.stack_after # don't mutate
2409	n/a	numtopop = len(before)
2410	n/a
2411	n/a	# See whether a MARK should be popped.
2412	n/a	markmsg = None
2413	n/a	if markobject in before or (opcode.name == "POP" and
2414	n/a	stack and
2415	n/a	stack[-1] is markobject):
2416	n/a	assert markobject not in after
2417	n/a	if __debug__:
2418	n/a	if markobject in before:
2419	n/a	assert before[-1] is stackslice
2420	n/a	if markstack:
2421	n/a	markpos = markstack.pop()
2422	n/a	if markpos is None:
2423	n/a	markmsg = "(MARK at unknown opcode offset)"
2424	n/a	else:
2425	n/a	markmsg = "(MARK at %d)" % markpos
2426	n/a	# Pop everything at and after the topmost markobject.
2427	n/a	while stack[-1] is not markobject:
2428	n/a	stack.pop()
2429	n/a	stack.pop()
2430	n/a	# Stop later code from popping too much.
2431	n/a	try:
2432	n/a	numtopop = before.index(markobject)
2433	n/a	except ValueError:
2434	n/a	assert opcode.name == "POP"
2435	n/a	numtopop = 0
2436	n/a	else:
2437	n/a	errormsg = markmsg = "no MARK exists on stack"
2438	n/a
2439	n/a	# Check for correct memo usage.
2440	n/a	if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT", "MEMOIZE"):
2441	n/a	if opcode.name == "MEMOIZE":
2442	n/a	memo_idx = len(memo)
2443	n/a	markmsg = "(as %d)" % memo_idx
2444	n/a	else:
2445	n/a	assert arg is not None
2446	n/a	memo_idx = arg
2447	n/a	if memo_idx in memo:
2448	n/a	errormsg = "memo key %r already defined" % arg
2449	n/a	elif not stack:
2450	n/a	errormsg = "stack is empty -- can't store into memo"
2451	n/a	elif stack[-1] is markobject:
2452	n/a	errormsg = "can't store markobject in the memo"
2453	n/a	else:
2454	n/a	memo[memo_idx] = stack[-1]
2455	n/a	elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
2456	n/a	if arg in memo:
2457	n/a	assert len(after) == 1
2458	n/a	after = [memo[arg]] # for better stack emulation
2459	n/a	else:
2460	n/a	errormsg = "memo key %r has never been stored into" % arg
2461	n/a
2462	n/a	if arg is not None or markmsg:
2463	n/a	# make a mild effort to align arguments
2464	n/a	line += ' ' * (10 - len(opcode.name))
2465	n/a	if arg is not None:
2466	n/a	line += ' ' + repr(arg)
2467	n/a	if markmsg:
2468	n/a	line += ' ' + markmsg
2469	n/a	if annotate:
2470	n/a	line += ' ' * (annocol - len(line))
2471	n/a	# make a mild effort to align annotations
2472	n/a	annocol = len(line)
2473	n/a	if annocol > 50:
2474	n/a	annocol = annotate
2475	n/a	line += ' ' + opcode.doc.split('\n', 1)[0]
2476	n/a	print(line, file=out)
2477	n/a
2478	n/a	if errormsg:
2479	n/a	# Note that we delayed complaining until the offending opcode
2480	n/a	# was printed.
2481	n/a	raise ValueError(errormsg)
2482	n/a
2483	n/a	# Emulate the stack effects.
2484	n/a	if len(stack) < numtopop:
2485	n/a	raise ValueError("tries to pop %d items from stack with "
2486	n/a	"only %d items" % (numtopop, len(stack)))
2487	n/a	if numtopop:
2488	n/a	del stack[-numtopop:]
2489	n/a	if markobject in after:
2490	n/a	assert markobject not in before
2491	n/a	markstack.append(pos)
2492	n/a
2493	n/a	stack.extend(after)
2494	n/a
2495	n/a	print("highest protocol among opcodes =", maxproto, file=out)
2496	n/a	if stack:
2497	n/a	raise ValueError("stack not empty after STOP: %r" % stack)
2498	n/a
2499	n/a	# For use in the doctest, simply as an example of a class to pickle.
2500	n/a	class _Example:
2501	n/a	def __init__(self, value):
2502	n/a	self.value = value
2503	n/a
2504	n/a	_dis_test = r"""
2505	n/a	>>> import pickle
2506	n/a	>>> x = [1, 2, (3, 4), {b'abc': "def"}]
2507	n/a	>>> pkl0 = pickle.dumps(x, 0)
2508	n/a	>>> dis(pkl0)
2509	n/a	0: ( MARK
2510	n/a	1: l LIST (MARK at 0)
2511	n/a	2: p PUT 0
2512	n/a	5: L LONG 1
2513	n/a	9: a APPEND
2514	n/a	10: L LONG 2
2515	n/a	14: a APPEND
2516	n/a	15: ( MARK
2517	n/a	16: L LONG 3
2518	n/a	20: L LONG 4
2519	n/a	24: t TUPLE (MARK at 15)
2520	n/a	25: p PUT 1
2521	n/a	28: a APPEND
2522	n/a	29: ( MARK
2523	n/a	30: d DICT (MARK at 29)
2524	n/a	31: p PUT 2
2525	n/a	34: c GLOBAL '_codecs encode'
2526	n/a	50: p PUT 3
2527	n/a	53: ( MARK
2528	n/a	54: V UNICODE 'abc'
2529	n/a	59: p PUT 4
2530	n/a	62: V UNICODE 'latin1'
2531	n/a	70: p PUT 5
2532	n/a	73: t TUPLE (MARK at 53)
2533	n/a	74: p PUT 6
2534	n/a	77: R REDUCE
2535	n/a	78: p PUT 7
2536	n/a	81: V UNICODE 'def'
2537	n/a	86: p PUT 8
2538	n/a	89: s SETITEM
2539	n/a	90: a APPEND
2540	n/a	91: . STOP
2541	n/a	highest protocol among opcodes = 0
2542	n/a
2543	n/a	Try again with a "binary" pickle.
2544	n/a
2545	n/a	>>> pkl1 = pickle.dumps(x, 1)
2546	n/a	>>> dis(pkl1)
2547	n/a	0: ] EMPTY_LIST
2548	n/a	1: q BINPUT 0
2549	n/a	3: ( MARK
2550	n/a	4: K BININT1 1
2551	n/a	6: K BININT1 2
2552	n/a	8: ( MARK
2553	n/a	9: K BININT1 3
2554	n/a	11: K BININT1 4
2555	n/a	13: t TUPLE (MARK at 8)
2556	n/a	14: q BINPUT 1
2557	n/a	16: } EMPTY_DICT
2558	n/a	17: q BINPUT 2
2559	n/a	19: c GLOBAL '_codecs encode'
2560	n/a	35: q BINPUT 3
2561	n/a	37: ( MARK
2562	n/a	38: X BINUNICODE 'abc'
2563	n/a	46: q BINPUT 4
2564	n/a	48: X BINUNICODE 'latin1'
2565	n/a	59: q BINPUT 5
2566	n/a	61: t TUPLE (MARK at 37)
2567	n/a	62: q BINPUT 6
2568	n/a	64: R REDUCE
2569	n/a	65: q BINPUT 7
2570	n/a	67: X BINUNICODE 'def'
2571	n/a	75: q BINPUT 8
2572	n/a	77: s SETITEM
2573	n/a	78: e APPENDS (MARK at 3)
2574	n/a	79: . STOP
2575	n/a	highest protocol among opcodes = 1
2576	n/a
2577	n/a	Exercise the INST/OBJ/BUILD family.
2578	n/a
2579	n/a	>>> import pickletools
2580	n/a	>>> dis(pickle.dumps(pickletools.dis, 0))
2581	n/a	0: c GLOBAL 'pickletools dis'
2582	n/a	17: p PUT 0
2583	n/a	20: . STOP
2584	n/a	highest protocol among opcodes = 0
2585	n/a
2586	n/a	>>> from pickletools import _Example
2587	n/a	>>> x = [_Example(42)] * 2
2588	n/a	>>> dis(pickle.dumps(x, 0))
2589	n/a	0: ( MARK
2590	n/a	1: l LIST (MARK at 0)
2591	n/a	2: p PUT 0
2592	n/a	5: c GLOBAL 'copy_reg _reconstructor'
2593	n/a	30: p PUT 1
2594	n/a	33: ( MARK
2595	n/a	34: c GLOBAL 'pickletools _Example'
2596	n/a	56: p PUT 2
2597	n/a	59: c GLOBAL '__builtin__ object'
2598	n/a	79: p PUT 3
2599	n/a	82: N NONE
2600	n/a	83: t TUPLE (MARK at 33)
2601	n/a	84: p PUT 4
2602	n/a	87: R REDUCE
2603	n/a	88: p PUT 5
2604	n/a	91: ( MARK
2605	n/a	92: d DICT (MARK at 91)
2606	n/a	93: p PUT 6
2607	n/a	96: V UNICODE 'value'
2608	n/a	103: p PUT 7
2609	n/a	106: L LONG 42
2610	n/a	111: s SETITEM
2611	n/a	112: b BUILD
2612	n/a	113: a APPEND
2613	n/a	114: g GET 5
2614	n/a	117: a APPEND
2615	n/a	118: . STOP
2616	n/a	highest protocol among opcodes = 0
2617	n/a
2618	n/a	>>> dis(pickle.dumps(x, 1))
2619	n/a	0: ] EMPTY_LIST
2620	n/a	1: q BINPUT 0
2621	n/a	3: ( MARK
2622	n/a	4: c GLOBAL 'copy_reg _reconstructor'
2623	n/a	29: q BINPUT 1
2624	n/a	31: ( MARK
2625	n/a	32: c GLOBAL 'pickletools _Example'
2626	n/a	54: q BINPUT 2
2627	n/a	56: c GLOBAL '__builtin__ object'
2628	n/a	76: q BINPUT 3
2629	n/a	78: N NONE
2630	n/a	79: t TUPLE (MARK at 31)
2631	n/a	80: q BINPUT 4
2632	n/a	82: R REDUCE
2633	n/a	83: q BINPUT 5
2634	n/a	85: } EMPTY_DICT
2635	n/a	86: q BINPUT 6
2636	n/a	88: X BINUNICODE 'value'
2637	n/a	98: q BINPUT 7
2638	n/a	100: K BININT1 42
2639	n/a	102: s SETITEM
2640	n/a	103: b BUILD
2641	n/a	104: h BINGET 5
2642	n/a	106: e APPENDS (MARK at 3)
2643	n/a	107: . STOP
2644	n/a	highest protocol among opcodes = 1
2645	n/a
2646	n/a	Try "the canonical" recursive-object test.
2647	n/a
2648	n/a	>>> L = []
2649	n/a	>>> T = L,
2650	n/a	>>> L.append(T)
2651	n/a	>>> L[0] is T
2652	n/a	True
2653	n/a	>>> T[0] is L
2654	n/a	True
2655	n/a	>>> L[0][0] is L
2656	n/a	True
2657	n/a	>>> T[0][0] is T
2658	n/a	True
2659	n/a	>>> dis(pickle.dumps(L, 0))
2660	n/a	0: ( MARK
2661	n/a	1: l LIST (MARK at 0)
2662	n/a	2: p PUT 0
2663	n/a	5: ( MARK
2664	n/a	6: g GET 0
2665	n/a	9: t TUPLE (MARK at 5)
2666	n/a	10: p PUT 1
2667	n/a	13: a APPEND
2668	n/a	14: . STOP
2669	n/a	highest protocol among opcodes = 0
2670	n/a
2671	n/a	>>> dis(pickle.dumps(L, 1))
2672	n/a	0: ] EMPTY_LIST
2673	n/a	1: q BINPUT 0
2674	n/a	3: ( MARK
2675	n/a	4: h BINGET 0
2676	n/a	6: t TUPLE (MARK at 3)
2677	n/a	7: q BINPUT 1
2678	n/a	9: a APPEND
2679	n/a	10: . STOP
2680	n/a	highest protocol among opcodes = 1
2681	n/a
2682	n/a	Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
2683	n/a	has to emulate the stack in order to realize that the POP opcode at 16 gets
2684	n/a	rid of the MARK at 0.
2685	n/a
2686	n/a	>>> dis(pickle.dumps(T, 0))
2687	n/a	0: ( MARK
2688	n/a	1: ( MARK
2689	n/a	2: l LIST (MARK at 1)
2690	n/a	3: p PUT 0
2691	n/a	6: ( MARK
2692	n/a	7: g GET 0
2693	n/a	10: t TUPLE (MARK at 6)
2694	n/a	11: p PUT 1
2695	n/a	14: a APPEND
2696	n/a	15: 0 POP
2697	n/a	16: 0 POP (MARK at 0)
2698	n/a	17: g GET 1
2699	n/a	20: . STOP
2700	n/a	highest protocol among opcodes = 0
2701	n/a
2702	n/a	>>> dis(pickle.dumps(T, 1))
2703	n/a	0: ( MARK
2704	n/a	1: ] EMPTY_LIST
2705	n/a	2: q BINPUT 0
2706	n/a	4: ( MARK
2707	n/a	5: h BINGET 0
2708	n/a	7: t TUPLE (MARK at 4)
2709	n/a	8: q BINPUT 1
2710	n/a	10: a APPEND
2711	n/a	11: 1 POP_MARK (MARK at 0)
2712	n/a	12: h BINGET 1
2713	n/a	14: . STOP
2714	n/a	highest protocol among opcodes = 1
2715	n/a
2716	n/a	Try protocol 2.
2717	n/a
2718	n/a	>>> dis(pickle.dumps(L, 2))
2719	n/a	0: \x80 PROTO 2
2720	n/a	2: ] EMPTY_LIST
2721	n/a	3: q BINPUT 0
2722	n/a	5: h BINGET 0
2723	n/a	7: \x85 TUPLE1
2724	n/a	8: q BINPUT 1
2725	n/a	10: a APPEND
2726	n/a	11: . STOP
2727	n/a	highest protocol among opcodes = 2
2728	n/a
2729	n/a	>>> dis(pickle.dumps(T, 2))
2730	n/a	0: \x80 PROTO 2
2731	n/a	2: ] EMPTY_LIST
2732	n/a	3: q BINPUT 0
2733	n/a	5: h BINGET 0
2734	n/a	7: \x85 TUPLE1
2735	n/a	8: q BINPUT 1
2736	n/a	10: a APPEND
2737	n/a	11: 0 POP
2738	n/a	12: h BINGET 1
2739	n/a	14: . STOP
2740	n/a	highest protocol among opcodes = 2
2741	n/a
2742	n/a	Try protocol 3 with annotations:
2743	n/a
2744	n/a	>>> dis(pickle.dumps(T, 3), annotate=1)
2745	n/a	0: \x80 PROTO 3 Protocol version indicator.
2746	n/a	2: ] EMPTY_LIST Push an empty list.
2747	n/a	3: q BINPUT 0 Store the stack top into the memo. The stack is not popped.
2748	n/a	5: h BINGET 0 Read an object from the memo and push it on the stack.
2749	n/a	7: \x85 TUPLE1 Build a one-tuple out of the topmost item on the stack.
2750	n/a	8: q BINPUT 1 Store the stack top into the memo. The stack is not popped.
2751	n/a	10: a APPEND Append an object to a list.
2752	n/a	11: 0 POP Discard the top stack item, shrinking the stack by one item.
2753	n/a	12: h BINGET 1 Read an object from the memo and push it on the stack.
2754	n/a	14: . STOP Stop the unpickling machine.
2755	n/a	highest protocol among opcodes = 2
2756	n/a
2757	n/a	"""
2758	n/a
2759	n/a	_memo_test = r"""
2760	n/a	>>> import pickle
2761	n/a	>>> import io
2762	n/a	>>> f = io.BytesIO()
2763	n/a	>>> p = pickle.Pickler(f, 2)
2764	n/a	>>> x = [1, 2, 3]
2765	n/a	>>> p.dump(x)
2766	n/a	>>> p.dump(x)
2767	n/a	>>> f.seek(0)
2768	n/a	0
2769	n/a	>>> memo = {}
2770	n/a	>>> dis(f, memo=memo)
2771	n/a	0: \x80 PROTO 2
2772	n/a	2: ] EMPTY_LIST
2773	n/a	3: q BINPUT 0
2774	n/a	5: ( MARK
2775	n/a	6: K BININT1 1
2776	n/a	8: K BININT1 2
2777	n/a	10: K BININT1 3
2778	n/a	12: e APPENDS (MARK at 5)
2779	n/a	13: . STOP
2780	n/a	highest protocol among opcodes = 2
2781	n/a	>>> dis(f, memo=memo)
2782	n/a	14: \x80 PROTO 2
2783	n/a	16: h BINGET 0
2784	n/a	18: . STOP
2785	n/a	highest protocol among opcodes = 2
2786	n/a	"""
2787	n/a
2788	n/a	__test__ = {'disassembler_test': _dis_test,
2789	n/a	'disassembler_memo_test': _memo_test,
2790	n/a	}
2791	n/a
2792	n/a	def _test():
2793	n/a	import doctest
2794	n/a	return doctest.testmod()
2795	n/a
2796	n/a	if __name__ == "__main__":
2797	n/a	import argparse
2798	n/a	parser = argparse.ArgumentParser(
2799	n/a	description='disassemble one or more pickle files')
2800	n/a	parser.add_argument(
2801	n/a	'pickle_file', type=argparse.FileType('br'),
2802	n/a	nargs='*', help='the pickle file')
2803	n/a	parser.add_argument(
2804	n/a	'-o', '--output', default=sys.stdout, type=argparse.FileType('w'),
2805	n/a	help='the file where the output should be written')
2806	n/a	parser.add_argument(
2807	n/a	'-m', '--memo', action='store_true',
2808	n/a	help='preserve memo between disassemblies')
2809	n/a	parser.add_argument(
2810	n/a	'-l', '--indentlevel', default=4, type=int,
2811	n/a	help='the number of blanks by which to indent a new MARK level')
2812	n/a	parser.add_argument(
2813	n/a	'-a', '--annotate', action='store_true',
2814	n/a	help='annotate each line with a short opcode description')
2815	n/a	parser.add_argument(
2816	n/a	'-p', '--preamble', default="==> {name} <==",
2817	n/a	help='if more than one pickle file is specified, print this before'
2818	n/a	' each disassembly')
2819	n/a	parser.add_argument(
2820	n/a	'-t', '--test', action='store_true',
2821	n/a	help='run self-test suite')
2822	n/a	parser.add_argument(
2823	n/a	'-v', action='store_true',
2824	n/a	help='run verbosely; only affects self-test run')
2825	n/a	args = parser.parse_args()
2826	n/a	if args.test:
2827	n/a	_test()
2828	n/a	else:
2829	n/a	annotate = 30 if args.annotate else 0
2830	n/a	if not args.pickle_file:
2831	n/a	parser.print_help()
2832	n/a	elif len(args.pickle_file) == 1:
2833	n/a	dis(args.pickle_file[0], args.output, None,
2834	n/a	args.indentlevel, annotate)
2835	n/a	else:
2836	n/a	memo = {} if args.memo else None
2837	n/a	for f in args.pickle_file:
2838	n/a	preamble = args.preamble.format(name=f.name)
2839	n/a	args.output.write(preamble + '\n')
2840	n/a	dis(f, args.output, memo, args.indentlevel, annotate)