ยปCore Development>Code coverage>Lib/lib2to3/btm_matcher.py

Python code coverage for Lib/lib2to3/btm_matcher.py

#countcontent
1n/a"""A bottom-up tree matching algorithm implementation meant to speed
2n/aup 2to3's matching process. After the tree patterns are reduced to
3n/atheir rarest linear path, a linear Aho-Corasick automaton is
4n/acreated. The linear automaton traverses the linear paths from the
5n/aleaves to the root of the AST and returns a set of nodes for further
6n/amatching. This reduces significantly the number of candidate nodes."""
7n/a
8n/a__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
9n/a
10n/aimport logging
11n/aimport itertools
12n/afrom collections import defaultdict
13n/a
14n/afrom . import pytree
15n/afrom .btm_utils import reduce_tree
16n/a
17n/aclass BMNode(object):
18n/a """Class for a node of the Aho-Corasick automaton used in matching"""
19n/a count = itertools.count()
20n/a def __init__(self):
21n/a self.transition_table = {}
22n/a self.fixers = []
23n/a self.id = next(BMNode.count)
24n/a self.content = ''
25n/a
26n/aclass BottomMatcher(object):
27n/a """The main matcher class. After instantiating the patterns should
28n/a be added using the add_fixer method"""
29n/a
30n/a def __init__(self):
31n/a self.match = set()
32n/a self.root = BMNode()
33n/a self.nodes = [self.root]
34n/a self.fixers = []
35n/a self.logger = logging.getLogger("RefactoringTool")
36n/a
37n/a def add_fixer(self, fixer):
38n/a """Reduces a fixer's pattern tree to a linear path and adds it
39n/a to the matcher(a common Aho-Corasick automaton). The fixer is
40n/a appended on the matching states and called when they are
41n/a reached"""
42n/a self.fixers.append(fixer)
43n/a tree = reduce_tree(fixer.pattern_tree)
44n/a linear = tree.get_linear_subpattern()
45n/a match_nodes = self.add(linear, start=self.root)
46n/a for match_node in match_nodes:
47n/a match_node.fixers.append(fixer)
48n/a
49n/a def add(self, pattern, start):
50n/a "Recursively adds a linear pattern to the AC automaton"
51n/a #print("adding pattern", pattern, "to", start)
52n/a if not pattern:
53n/a #print("empty pattern")
54n/a return [start]
55n/a if isinstance(pattern[0], tuple):
56n/a #alternatives
57n/a #print("alternatives")
58n/a match_nodes = []
59n/a for alternative in pattern[0]:
60n/a #add all alternatives, and add the rest of the pattern
61n/a #to each end node
62n/a end_nodes = self.add(alternative, start=start)
63n/a for end in end_nodes:
64n/a match_nodes.extend(self.add(pattern[1:], end))
65n/a return match_nodes
66n/a else:
67n/a #single token
68n/a #not last
69n/a if pattern[0] not in start.transition_table:
70n/a #transition did not exist, create new
71n/a next_node = BMNode()
72n/a start.transition_table[pattern[0]] = next_node
73n/a else:
74n/a #transition exists already, follow
75n/a next_node = start.transition_table[pattern[0]]
76n/a
77n/a if pattern[1:]:
78n/a end_nodes = self.add(pattern[1:], start=next_node)
79n/a else:
80n/a end_nodes = [next_node]
81n/a return end_nodes
82n/a
83n/a def run(self, leaves):
84n/a """The main interface with the bottom matcher. The tree is
85n/a traversed from the bottom using the constructed
86n/a automaton. Nodes are only checked once as the tree is
87n/a retraversed. When the automaton fails, we give it one more
88n/a shot(in case the above tree matches as a whole with the
89n/a rejected leaf), then we break for the next leaf. There is the
90n/a special case of multiple arguments(see code comments) where we
91n/a recheck the nodes
92n/a
93n/a Args:
94n/a The leaves of the AST tree to be matched
95n/a
96n/a Returns:
97n/a A dictionary of node matches with fixers as the keys
98n/a """
99n/a current_ac_node = self.root
100n/a results = defaultdict(list)
101n/a for leaf in leaves:
102n/a current_ast_node = leaf
103n/a while current_ast_node:
104n/a current_ast_node.was_checked = True
105n/a for child in current_ast_node.children:
106n/a # multiple statements, recheck
107n/a if isinstance(child, pytree.Leaf) and child.value == ";":
108n/a current_ast_node.was_checked = False
109n/a break
110n/a if current_ast_node.type == 1:
111n/a #name
112n/a node_token = current_ast_node.value
113n/a else:
114n/a node_token = current_ast_node.type
115n/a
116n/a if node_token in current_ac_node.transition_table:
117n/a #token matches
118n/a current_ac_node = current_ac_node.transition_table[node_token]
119n/a for fixer in current_ac_node.fixers:
120n/a if not fixer in results:
121n/a results[fixer] = []
122n/a results[fixer].append(current_ast_node)
123n/a
124n/a else:
125n/a #matching failed, reset automaton
126n/a current_ac_node = self.root
127n/a if (current_ast_node.parent is not None
128n/a and current_ast_node.parent.was_checked):
129n/a #the rest of the tree upwards has been checked, next leaf
130n/a break
131n/a
132n/a #recheck the rejected node once from the root
133n/a if node_token in current_ac_node.transition_table:
134n/a #token matches
135n/a current_ac_node = current_ac_node.transition_table[node_token]
136n/a for fixer in current_ac_node.fixers:
137n/a if not fixer in results.keys():
138n/a results[fixer] = []
139n/a results[fixer].append(current_ast_node)
140n/a
141n/a current_ast_node = current_ast_node.parent
142n/a return results
143n/a
144n/a def print_ac(self):
145n/a "Prints a graphviz diagram of the BM automaton(for debugging)"
146n/a print("digraph g{")
147n/a def print_node(node):
148n/a for subnode_key in node.transition_table.keys():
149n/a subnode = node.transition_table[subnode_key]
150n/a print("%d -> %d [label=%s] //%s" %
151n/a (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
152n/a if subnode_key == 1:
153n/a print(subnode.content)
154n/a print_node(subnode)
155n/a print_node(self.root)
156n/a print("}")
157n/a
158n/a# taken from pytree.py for debugging; only used by print_ac
159n/a_type_reprs = {}
160n/adef type_repr(type_num):
161n/a global _type_reprs
162n/a if not _type_reprs:
163n/a from .pygram import python_symbols
164n/a # printing tokens is possible but not as useful
165n/a # from .pgen2 import token // token.__dict__.items():
166n/a for name, val in python_symbols.__dict__.items():
167n/a if type(val) == int: _type_reprs[val] = name
168n/a return _type_reprs.setdefault(type_num, type_num)