ยปCore Development>Code coverage>Lib/profile.py

Python code coverage for Lib/profile.py

#countcontent
1n/a#! /usr/bin/env python3
2n/a#
3n/a# Class for profiling python code. rev 1.0 6/2/94
4n/a#
5n/a# Written by James Roskind
6n/a# Based on prior profile module by Sjoerd Mullender...
7n/a# which was hacked somewhat by: Guido van Rossum
8n/a
9n/a"""Class for profiling Python code."""
10n/a
11n/a# Copyright Disney Enterprises, Inc. All Rights Reserved.
12n/a# Licensed to PSF under a Contributor Agreement
13n/a#
14n/a# Licensed under the Apache License, Version 2.0 (the "License");
15n/a# you may not use this file except in compliance with the License.
16n/a# You may obtain a copy of the License at
17n/a#
18n/a# http://www.apache.org/licenses/LICENSE-2.0
19n/a#
20n/a# Unless required by applicable law or agreed to in writing, software
21n/a# distributed under the License is distributed on an "AS IS" BASIS,
22n/a# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23n/a# either express or implied. See the License for the specific language
24n/a# governing permissions and limitations under the License.
25n/a
26n/a
27n/aimport sys
28n/aimport os
29n/aimport time
30n/aimport marshal
31n/afrom optparse import OptionParser
32n/a
33n/a__all__ = ["run", "runctx", "Profile"]
34n/a
35n/a# Sample timer for use with
36n/a#i_count = 0
37n/a#def integer_timer():
38n/a# global i_count
39n/a# i_count = i_count + 1
40n/a# return i_count
41n/a#itimes = integer_timer # replace with C coded timer returning integers
42n/a
43n/aclass _Utils:
44n/a """Support class for utility functions which are shared by
45n/a profile.py and cProfile.py modules.
46n/a Not supposed to be used directly.
47n/a """
48n/a
49n/a def __init__(self, profiler):
50n/a self.profiler = profiler
51n/a
52n/a def run(self, statement, filename, sort):
53n/a prof = self.profiler()
54n/a try:
55n/a prof.run(statement)
56n/a except SystemExit:
57n/a pass
58n/a finally:
59n/a self._show(prof, filename, sort)
60n/a
61n/a def runctx(self, statement, globals, locals, filename, sort):
62n/a prof = self.profiler()
63n/a try:
64n/a prof.runctx(statement, globals, locals)
65n/a except SystemExit:
66n/a pass
67n/a finally:
68n/a self._show(prof, filename, sort)
69n/a
70n/a def _show(self, prof, filename, sort):
71n/a if filename is not None:
72n/a prof.dump_stats(filename)
73n/a else:
74n/a prof.print_stats(sort)
75n/a
76n/a
77n/a#**************************************************************************
78n/a# The following are the static member functions for the profiler class
79n/a# Note that an instance of Profile() is *not* needed to call them.
80n/a#**************************************************************************
81n/a
82n/adef run(statement, filename=None, sort=-1):
83n/a """Run statement under profiler optionally saving results in filename
84n/a
85n/a This function takes a single argument that can be passed to the
86n/a "exec" statement, and an optional file name. In all cases this
87n/a routine attempts to "exec" its first argument and gather profiling
88n/a statistics from the execution. If no file name is present, then this
89n/a function automatically prints a simple profiling report, sorted by the
90n/a standard name string (file/line/function-name) that is presented in
91n/a each line.
92n/a """
93n/a return _Utils(Profile).run(statement, filename, sort)
94n/a
95n/adef runctx(statement, globals, locals, filename=None, sort=-1):
96n/a """Run statement under profiler, supplying your own globals and locals,
97n/a optionally saving results in filename.
98n/a
99n/a statement and filename have the same semantics as profile.run
100n/a """
101n/a return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
102n/a
103n/a
104n/aclass Profile:
105n/a """Profiler class.
106n/a
107n/a self.cur is always a tuple. Each such tuple corresponds to a stack
108n/a frame that is currently active (self.cur[-2]). The following are the
109n/a definitions of its members. We use this external "parallel stack" to
110n/a avoid contaminating the program that we are profiling. (old profiler
111n/a used to write into the frames local dictionary!!) Derived classes
112n/a can change the definition of some entries, as long as they leave
113n/a [-2:] intact (frame and previous tuple). In case an internal error is
114n/a detected, the -3 element is used as the function name.
115n/a
116n/a [ 0] = Time that needs to be charged to the parent frame's function.
117n/a It is used so that a function call will not have to access the
118n/a timing data for the parent frame.
119n/a [ 1] = Total time spent in this frame's function, excluding time in
120n/a subfunctions (this latter is tallied in cur[2]).
121n/a [ 2] = Total time spent in subfunctions, excluding time executing the
122n/a frame's function (this latter is tallied in cur[1]).
123n/a [-3] = Name of the function that corresponds to this frame.
124n/a [-2] = Actual frame that we correspond to (used to sync exception handling).
125n/a [-1] = Our parent 6-tuple (corresponds to frame.f_back).
126n/a
127n/a Timing data for each function is stored as a 5-tuple in the dictionary
128n/a self.timings[]. The index is always the name stored in self.cur[-3].
129n/a The following are the definitions of the members:
130n/a
131n/a [0] = The number of times this function was called, not counting direct
132n/a or indirect recursion,
133n/a [1] = Number of times this function appears on the stack, minus one
134n/a [2] = Total time spent internal to this function
135n/a [3] = Cumulative time that this function was present on the stack. In
136n/a non-recursive functions, this is the total execution time from start
137n/a to finish of each invocation of a function, including time spent in
138n/a all subfunctions.
139n/a [4] = A dictionary indicating for each function name, the number of times
140n/a it was called by us.
141n/a """
142n/a
143n/a bias = 0 # calibration constant
144n/a
145n/a def __init__(self, timer=None, bias=None):
146n/a self.timings = {}
147n/a self.cur = None
148n/a self.cmd = ""
149n/a self.c_func_name = ""
150n/a
151n/a if bias is None:
152n/a bias = self.bias
153n/a self.bias = bias # Materialize in local dict for lookup speed.
154n/a
155n/a if not timer:
156n/a self.timer = self.get_time = time.process_time
157n/a self.dispatcher = self.trace_dispatch_i
158n/a else:
159n/a self.timer = timer
160n/a t = self.timer() # test out timer function
161n/a try:
162n/a length = len(t)
163n/a except TypeError:
164n/a self.get_time = timer
165n/a self.dispatcher = self.trace_dispatch_i
166n/a else:
167n/a if length == 2:
168n/a self.dispatcher = self.trace_dispatch
169n/a else:
170n/a self.dispatcher = self.trace_dispatch_l
171n/a # This get_time() implementation needs to be defined
172n/a # here to capture the passed-in timer in the parameter
173n/a # list (for performance). Note that we can't assume
174n/a # the timer() result contains two values in all
175n/a # cases.
176n/a def get_time_timer(timer=timer, sum=sum):
177n/a return sum(timer())
178n/a self.get_time = get_time_timer
179n/a self.t = self.get_time()
180n/a self.simulate_call('profiler')
181n/a
182n/a # Heavily optimized dispatch routine for os.times() timer
183n/a
184n/a def trace_dispatch(self, frame, event, arg):
185n/a timer = self.timer
186n/a t = timer()
187n/a t = t[0] + t[1] - self.t - self.bias
188n/a
189n/a if event == "c_call":
190n/a self.c_func_name = arg.__name__
191n/a
192n/a if self.dispatch[event](self, frame,t):
193n/a t = timer()
194n/a self.t = t[0] + t[1]
195n/a else:
196n/a r = timer()
197n/a self.t = r[0] + r[1] - t # put back unrecorded delta
198n/a
199n/a # Dispatch routine for best timer program (return = scalar, fastest if
200n/a # an integer but float works too -- and time.clock() relies on that).
201n/a
202n/a def trace_dispatch_i(self, frame, event, arg):
203n/a timer = self.timer
204n/a t = timer() - self.t - self.bias
205n/a
206n/a if event == "c_call":
207n/a self.c_func_name = arg.__name__
208n/a
209n/a if self.dispatch[event](self, frame, t):
210n/a self.t = timer()
211n/a else:
212n/a self.t = timer() - t # put back unrecorded delta
213n/a
214n/a # Dispatch routine for macintosh (timer returns time in ticks of
215n/a # 1/60th second)
216n/a
217n/a def trace_dispatch_mac(self, frame, event, arg):
218n/a timer = self.timer
219n/a t = timer()/60.0 - self.t - self.bias
220n/a
221n/a if event == "c_call":
222n/a self.c_func_name = arg.__name__
223n/a
224n/a if self.dispatch[event](self, frame, t):
225n/a self.t = timer()/60.0
226n/a else:
227n/a self.t = timer()/60.0 - t # put back unrecorded delta
228n/a
229n/a # SLOW generic dispatch routine for timer returning lists of numbers
230n/a
231n/a def trace_dispatch_l(self, frame, event, arg):
232n/a get_time = self.get_time
233n/a t = get_time() - self.t - self.bias
234n/a
235n/a if event == "c_call":
236n/a self.c_func_name = arg.__name__
237n/a
238n/a if self.dispatch[event](self, frame, t):
239n/a self.t = get_time()
240n/a else:
241n/a self.t = get_time() - t # put back unrecorded delta
242n/a
243n/a # In the event handlers, the first 3 elements of self.cur are unpacked
244n/a # into vrbls w/ 3-letter names. The last two characters are meant to be
245n/a # mnemonic:
246n/a # _pt self.cur[0] "parent time" time to be charged to parent frame
247n/a # _it self.cur[1] "internal time" time spent directly in the function
248n/a # _et self.cur[2] "external time" time spent in subfunctions
249n/a
250n/a def trace_dispatch_exception(self, frame, t):
251n/a rpt, rit, ret, rfn, rframe, rcur = self.cur
252n/a if (rframe is not frame) and rcur:
253n/a return self.trace_dispatch_return(rframe, t)
254n/a self.cur = rpt, rit+t, ret, rfn, rframe, rcur
255n/a return 1
256n/a
257n/a
258n/a def trace_dispatch_call(self, frame, t):
259n/a if self.cur and frame.f_back is not self.cur[-2]:
260n/a rpt, rit, ret, rfn, rframe, rcur = self.cur
261n/a if not isinstance(rframe, Profile.fake_frame):
262n/a assert rframe.f_back is frame.f_back, ("Bad call", rfn,
263n/a rframe, rframe.f_back,
264n/a frame, frame.f_back)
265n/a self.trace_dispatch_return(rframe, 0)
266n/a assert (self.cur is None or \
267n/a frame.f_back is self.cur[-2]), ("Bad call",
268n/a self.cur[-3])
269n/a fcode = frame.f_code
270n/a fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
271n/a self.cur = (t, 0, 0, fn, frame, self.cur)
272n/a timings = self.timings
273n/a if fn in timings:
274n/a cc, ns, tt, ct, callers = timings[fn]
275n/a timings[fn] = cc, ns + 1, tt, ct, callers
276n/a else:
277n/a timings[fn] = 0, 0, 0, 0, {}
278n/a return 1
279n/a
280n/a def trace_dispatch_c_call (self, frame, t):
281n/a fn = ("", 0, self.c_func_name)
282n/a self.cur = (t, 0, 0, fn, frame, self.cur)
283n/a timings = self.timings
284n/a if fn in timings:
285n/a cc, ns, tt, ct, callers = timings[fn]
286n/a timings[fn] = cc, ns+1, tt, ct, callers
287n/a else:
288n/a timings[fn] = 0, 0, 0, 0, {}
289n/a return 1
290n/a
291n/a def trace_dispatch_return(self, frame, t):
292n/a if frame is not self.cur[-2]:
293n/a assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
294n/a self.trace_dispatch_return(self.cur[-2], 0)
295n/a
296n/a # Prefix "r" means part of the Returning or exiting frame.
297n/a # Prefix "p" means part of the Previous or Parent or older frame.
298n/a
299n/a rpt, rit, ret, rfn, frame, rcur = self.cur
300n/a rit = rit + t
301n/a frame_total = rit + ret
302n/a
303n/a ppt, pit, pet, pfn, pframe, pcur = rcur
304n/a self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
305n/a
306n/a timings = self.timings
307n/a cc, ns, tt, ct, callers = timings[rfn]
308n/a if not ns:
309n/a # This is the only occurrence of the function on the stack.
310n/a # Else this is a (directly or indirectly) recursive call, and
311n/a # its cumulative time will get updated when the topmost call to
312n/a # it returns.
313n/a ct = ct + frame_total
314n/a cc = cc + 1
315n/a
316n/a if pfn in callers:
317n/a callers[pfn] = callers[pfn] + 1 # hack: gather more
318n/a # stats such as the amount of time added to ct courtesy
319n/a # of this specific call, and the contribution to cc
320n/a # courtesy of this call.
321n/a else:
322n/a callers[pfn] = 1
323n/a
324n/a timings[rfn] = cc, ns - 1, tt + rit, ct, callers
325n/a
326n/a return 1
327n/a
328n/a
329n/a dispatch = {
330n/a "call": trace_dispatch_call,
331n/a "exception": trace_dispatch_exception,
332n/a "return": trace_dispatch_return,
333n/a "c_call": trace_dispatch_c_call,
334n/a "c_exception": trace_dispatch_return, # the C function returned
335n/a "c_return": trace_dispatch_return,
336n/a }
337n/a
338n/a
339n/a # The next few functions play with self.cmd. By carefully preloading
340n/a # our parallel stack, we can force the profiled result to include
341n/a # an arbitrary string as the name of the calling function.
342n/a # We use self.cmd as that string, and the resulting stats look
343n/a # very nice :-).
344n/a
345n/a def set_cmd(self, cmd):
346n/a if self.cur[-1]: return # already set
347n/a self.cmd = cmd
348n/a self.simulate_call(cmd)
349n/a
350n/a class fake_code:
351n/a def __init__(self, filename, line, name):
352n/a self.co_filename = filename
353n/a self.co_line = line
354n/a self.co_name = name
355n/a self.co_firstlineno = 0
356n/a
357n/a def __repr__(self):
358n/a return repr((self.co_filename, self.co_line, self.co_name))
359n/a
360n/a class fake_frame:
361n/a def __init__(self, code, prior):
362n/a self.f_code = code
363n/a self.f_back = prior
364n/a
365n/a def simulate_call(self, name):
366n/a code = self.fake_code('profile', 0, name)
367n/a if self.cur:
368n/a pframe = self.cur[-2]
369n/a else:
370n/a pframe = None
371n/a frame = self.fake_frame(code, pframe)
372n/a self.dispatch['call'](self, frame, 0)
373n/a
374n/a # collect stats from pending stack, including getting final
375n/a # timings for self.cmd frame.
376n/a
377n/a def simulate_cmd_complete(self):
378n/a get_time = self.get_time
379n/a t = get_time() - self.t
380n/a while self.cur[-1]:
381n/a # We *can* cause assertion errors here if
382n/a # dispatch_trace_return checks for a frame match!
383n/a self.dispatch['return'](self, self.cur[-2], t)
384n/a t = 0
385n/a self.t = get_time() - t
386n/a
387n/a
388n/a def print_stats(self, sort=-1):
389n/a import pstats
390n/a pstats.Stats(self).strip_dirs().sort_stats(sort). \
391n/a print_stats()
392n/a
393n/a def dump_stats(self, file):
394n/a with open(file, 'wb') as f:
395n/a self.create_stats()
396n/a marshal.dump(self.stats, f)
397n/a
398n/a def create_stats(self):
399n/a self.simulate_cmd_complete()
400n/a self.snapshot_stats()
401n/a
402n/a def snapshot_stats(self):
403n/a self.stats = {}
404n/a for func, (cc, ns, tt, ct, callers) in self.timings.items():
405n/a callers = callers.copy()
406n/a nc = 0
407n/a for callcnt in callers.values():
408n/a nc += callcnt
409n/a self.stats[func] = cc, nc, tt, ct, callers
410n/a
411n/a
412n/a # The following two methods can be called by clients to use
413n/a # a profiler to profile a statement, given as a string.
414n/a
415n/a def run(self, cmd):
416n/a import __main__
417n/a dict = __main__.__dict__
418n/a return self.runctx(cmd, dict, dict)
419n/a
420n/a def runctx(self, cmd, globals, locals):
421n/a self.set_cmd(cmd)
422n/a sys.setprofile(self.dispatcher)
423n/a try:
424n/a exec(cmd, globals, locals)
425n/a finally:
426n/a sys.setprofile(None)
427n/a return self
428n/a
429n/a # This method is more useful to profile a single function call.
430n/a def runcall(self, func, *args, **kw):
431n/a self.set_cmd(repr(func))
432n/a sys.setprofile(self.dispatcher)
433n/a try:
434n/a return func(*args, **kw)
435n/a finally:
436n/a sys.setprofile(None)
437n/a
438n/a
439n/a #******************************************************************
440n/a # The following calculates the overhead for using a profiler. The
441n/a # problem is that it takes a fair amount of time for the profiler
442n/a # to stop the stopwatch (from the time it receives an event).
443n/a # Similarly, there is a delay from the time that the profiler
444n/a # re-starts the stopwatch before the user's code really gets to
445n/a # continue. The following code tries to measure the difference on
446n/a # a per-event basis.
447n/a #
448n/a # Note that this difference is only significant if there are a lot of
449n/a # events, and relatively little user code per event. For example,
450n/a # code with small functions will typically benefit from having the
451n/a # profiler calibrated for the current platform. This *could* be
452n/a # done on the fly during init() time, but it is not worth the
453n/a # effort. Also note that if too large a value specified, then
454n/a # execution time on some functions will actually appear as a
455n/a # negative number. It is *normal* for some functions (with very
456n/a # low call counts) to have such negative stats, even if the
457n/a # calibration figure is "correct."
458n/a #
459n/a # One alternative to profile-time calibration adjustments (i.e.,
460n/a # adding in the magic little delta during each event) is to track
461n/a # more carefully the number of events (and cumulatively, the number
462n/a # of events during sub functions) that are seen. If this were
463n/a # done, then the arithmetic could be done after the fact (i.e., at
464n/a # display time). Currently, we track only call/return events.
465n/a # These values can be deduced by examining the callees and callers
466n/a # vectors for each functions. Hence we *can* almost correct the
467n/a # internal time figure at print time (note that we currently don't
468n/a # track exception event processing counts). Unfortunately, there
469n/a # is currently no similar information for cumulative sub-function
470n/a # time. It would not be hard to "get all this info" at profiler
471n/a # time. Specifically, we would have to extend the tuples to keep
472n/a # counts of this in each frame, and then extend the defs of timing
473n/a # tuples to include the significant two figures. I'm a bit fearful
474n/a # that this additional feature will slow the heavily optimized
475n/a # event/time ratio (i.e., the profiler would run slower, fur a very
476n/a # low "value added" feature.)
477n/a #**************************************************************
478n/a
479n/a def calibrate(self, m, verbose=0):
480n/a if self.__class__ is not Profile:
481n/a raise TypeError("Subclasses must override .calibrate().")
482n/a
483n/a saved_bias = self.bias
484n/a self.bias = 0
485n/a try:
486n/a return self._calibrate_inner(m, verbose)
487n/a finally:
488n/a self.bias = saved_bias
489n/a
490n/a def _calibrate_inner(self, m, verbose):
491n/a get_time = self.get_time
492n/a
493n/a # Set up a test case to be run with and without profiling. Include
494n/a # lots of calls, because we're trying to quantify stopwatch overhead.
495n/a # Do not raise any exceptions, though, because we want to know
496n/a # exactly how many profile events are generated (one call event, +
497n/a # one return event, per Python-level call).
498n/a
499n/a def f1(n):
500n/a for i in range(n):
501n/a x = 1
502n/a
503n/a def f(m, f1=f1):
504n/a for i in range(m):
505n/a f1(100)
506n/a
507n/a f(m) # warm up the cache
508n/a
509n/a # elapsed_noprofile <- time f(m) takes without profiling.
510n/a t0 = get_time()
511n/a f(m)
512n/a t1 = get_time()
513n/a elapsed_noprofile = t1 - t0
514n/a if verbose:
515n/a print("elapsed time without profiling =", elapsed_noprofile)
516n/a
517n/a # elapsed_profile <- time f(m) takes with profiling. The difference
518n/a # is profiling overhead, only some of which the profiler subtracts
519n/a # out on its own.
520n/a p = Profile()
521n/a t0 = get_time()
522n/a p.runctx('f(m)', globals(), locals())
523n/a t1 = get_time()
524n/a elapsed_profile = t1 - t0
525n/a if verbose:
526n/a print("elapsed time with profiling =", elapsed_profile)
527n/a
528n/a # reported_time <- "CPU seconds" the profiler charged to f and f1.
529n/a total_calls = 0.0
530n/a reported_time = 0.0
531n/a for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
532n/a p.timings.items():
533n/a if funcname in ("f", "f1"):
534n/a total_calls += cc
535n/a reported_time += tt
536n/a
537n/a if verbose:
538n/a print("'CPU seconds' profiler reported =", reported_time)
539n/a print("total # calls =", total_calls)
540n/a if total_calls != m + 1:
541n/a raise ValueError("internal error: total calls = %d" % total_calls)
542n/a
543n/a # reported_time - elapsed_noprofile = overhead the profiler wasn't
544n/a # able to measure. Divide by twice the number of calls (since there
545n/a # are two profiler events per call in this test) to get the hidden
546n/a # overhead per event.
547n/a mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
548n/a if verbose:
549n/a print("mean stopwatch overhead per profile event =", mean)
550n/a return mean
551n/a
552n/a#****************************************************************************
553n/a
554n/adef main():
555n/a usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
556n/a parser = OptionParser(usage=usage)
557n/a parser.allow_interspersed_args = False
558n/a parser.add_option('-o', '--outfile', dest="outfile",
559n/a help="Save stats to <outfile>", default=None)
560n/a parser.add_option('-s', '--sort', dest="sort",
561n/a help="Sort order when printing to stdout, based on pstats.Stats class",
562n/a default=-1)
563n/a
564n/a if not sys.argv[1:]:
565n/a parser.print_usage()
566n/a sys.exit(2)
567n/a
568n/a (options, args) = parser.parse_args()
569n/a sys.argv[:] = args
570n/a
571n/a if len(args) > 0:
572n/a progname = args[0]
573n/a sys.path.insert(0, os.path.dirname(progname))
574n/a with open(progname, 'rb') as fp:
575n/a code = compile(fp.read(), progname, 'exec')
576n/a globs = {
577n/a '__file__': progname,
578n/a '__name__': '__main__',
579n/a '__package__': None,
580n/a '__cached__': None,
581n/a }
582n/a runctx(code, globs, None, options.outfile, options.sort)
583n/a else:
584n/a parser.print_usage()
585n/a return parser
586n/a
587n/a# When invoked as main program, invoke the profiler on a script
588n/aif __name__ == '__main__':
589n/a main()