1 | n/a | #! /usr/bin/env python3 |
---|
2 | n/a | |
---|
3 | n/a | """ |
---|
4 | n/a | combinerefs path |
---|
5 | n/a | |
---|
6 | n/a | A helper for analyzing PYTHONDUMPREFS output. |
---|
7 | n/a | |
---|
8 | n/a | When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
---|
9 | n/a | time Py_FinalizeEx() prints the list of all live objects twice: first it |
---|
10 | n/a | prints the repr() of each object while the interpreter is still fully intact. |
---|
11 | n/a | After cleaning up everything it can, it prints all remaining live objects |
---|
12 | n/a | again, but the second time just prints their addresses, refcounts, and type |
---|
13 | n/a | names (because the interpreter has been torn down, calling repr methods at |
---|
14 | n/a | this point can get into infinite loops or blow up). |
---|
15 | n/a | |
---|
16 | n/a | Save all this output into a file, then run this script passing the path to |
---|
17 | n/a | that file. The script finds both output chunks, combines them, then prints |
---|
18 | n/a | a line of output for each object still alive at the end: |
---|
19 | n/a | |
---|
20 | n/a | address refcnt typename repr |
---|
21 | n/a | |
---|
22 | n/a | address is the address of the object, in whatever format the platform C |
---|
23 | n/a | produces for a %p format code. |
---|
24 | n/a | |
---|
25 | n/a | refcnt is of the form |
---|
26 | n/a | |
---|
27 | n/a | "[" ref "]" |
---|
28 | n/a | |
---|
29 | n/a | when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
---|
30 | n/a | or |
---|
31 | n/a | |
---|
32 | n/a | "[" ref_before "->" ref_after "]" |
---|
33 | n/a | |
---|
34 | n/a | if the refcount changed. |
---|
35 | n/a | |
---|
36 | n/a | typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
---|
37 | n/a | output block. |
---|
38 | n/a | |
---|
39 | n/a | repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
---|
40 | n/a | CAUTION: If object is a container type, it may not actually contain all the |
---|
41 | n/a | objects shown in the repr: the repr was captured from the first output block, |
---|
42 | n/a | and some of the containees may have been released since then. For example, |
---|
43 | n/a | it's common for the line showing the dict of interned strings to display |
---|
44 | n/a | strings that no longer exist at the end of Py_FinalizeEx; this can be recognized |
---|
45 | n/a | (albeit painfully) because such containees don't have a line of their own. |
---|
46 | n/a | |
---|
47 | n/a | The objects are listed in allocation order, with most-recently allocated |
---|
48 | n/a | printed first, and the first object allocated printed last. |
---|
49 | n/a | |
---|
50 | n/a | |
---|
51 | n/a | Simple examples: |
---|
52 | n/a | |
---|
53 | n/a | 00857060 [14] str '__len__' |
---|
54 | n/a | |
---|
55 | n/a | The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
---|
56 | n/a | output blocks said there were 14 references to it. This is probably due to |
---|
57 | n/a | C modules that intern the string "__len__" and keep a reference to it in a |
---|
58 | n/a | file static. |
---|
59 | n/a | |
---|
60 | n/a | 00857038 [46->5] tuple () |
---|
61 | n/a | |
---|
62 | n/a | 46-5 = 41 references to the empty tuple were removed by the cleanup actions |
---|
63 | n/a | between the times PYTHONDUMPREFS produced output. |
---|
64 | n/a | |
---|
65 | n/a | 00858028 [1025->1456] str '<dummy key>' |
---|
66 | n/a | |
---|
67 | n/a | The string '<dummy key>', which is used in dictobject.c to overwrite a real |
---|
68 | n/a | key that gets deleted, grew several hundred references during cleanup. It |
---|
69 | n/a | suggests that stuff did get removed from dicts by cleanup, but that the dicts |
---|
70 | n/a | themselves are staying alive for some reason. """ |
---|
71 | n/a | |
---|
72 | n/a | import re |
---|
73 | n/a | import sys |
---|
74 | n/a | |
---|
75 | n/a | # Generate lines from fileiter. If whilematch is true, continue reading |
---|
76 | n/a | # while the regexp object pat matches line. If whilematch is false, lines |
---|
77 | n/a | # are read so long as pat doesn't match them. In any case, the first line |
---|
78 | n/a | # that doesn't match pat (when whilematch is true), or that does match pat |
---|
79 | n/a | # (when whilematch is false), is lost, and fileiter will resume at the line |
---|
80 | n/a | # following it. |
---|
81 | n/a | def read(fileiter, pat, whilematch): |
---|
82 | n/a | for line in fileiter: |
---|
83 | n/a | if bool(pat.match(line)) == whilematch: |
---|
84 | n/a | yield line |
---|
85 | n/a | else: |
---|
86 | n/a | break |
---|
87 | n/a | |
---|
88 | n/a | def combine(fname): |
---|
89 | n/a | f = open(fname) |
---|
90 | n/a | |
---|
91 | n/a | fi = iter(f) |
---|
92 | n/a | |
---|
93 | n/a | for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
---|
94 | n/a | pass |
---|
95 | n/a | |
---|
96 | n/a | crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
---|
97 | n/a | addr2rc = {} |
---|
98 | n/a | addr2guts = {} |
---|
99 | n/a | before = 0 |
---|
100 | n/a | for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
---|
101 | n/a | m = crack.match(line) |
---|
102 | n/a | if m: |
---|
103 | n/a | addr, addr2rc[addr], addr2guts[addr] = m.groups() |
---|
104 | n/a | before += 1 |
---|
105 | n/a | else: |
---|
106 | n/a | print('??? skipped:', line) |
---|
107 | n/a | |
---|
108 | n/a | after = 0 |
---|
109 | n/a | for line in read(fi, crack, True): |
---|
110 | n/a | after += 1 |
---|
111 | n/a | m = crack.match(line) |
---|
112 | n/a | assert m |
---|
113 | n/a | addr, rc, guts = m.groups() # guts is type name here |
---|
114 | n/a | if addr not in addr2rc: |
---|
115 | n/a | print('??? new object created while tearing down:', line.rstrip()) |
---|
116 | n/a | continue |
---|
117 | n/a | print(addr, end=' ') |
---|
118 | n/a | if rc == addr2rc[addr]: |
---|
119 | n/a | print('[%s]' % rc, end=' ') |
---|
120 | n/a | else: |
---|
121 | n/a | print('[%s->%s]' % (addr2rc[addr], rc), end=' ') |
---|
122 | n/a | print(guts, addr2guts[addr]) |
---|
123 | n/a | |
---|
124 | n/a | f.close() |
---|
125 | n/a | print("%d objects before, %d after" % (before, after)) |
---|
126 | n/a | |
---|
127 | n/a | if __name__ == '__main__': |
---|
128 | n/a | combine(sys.argv[1]) |
---|