1 | n/a | """A bottom-up tree matching algorithm implementation meant to speed |
---|
2 | n/a | up 2to3's matching process. After the tree patterns are reduced to |
---|
3 | n/a | their rarest linear path, a linear Aho-Corasick automaton is |
---|
4 | n/a | created. The linear automaton traverses the linear paths from the |
---|
5 | n/a | leaves to the root of the AST and returns a set of nodes for further |
---|
6 | n/a | matching. This reduces significantly the number of candidate nodes.""" |
---|
7 | n/a | |
---|
8 | n/a | __author__ = "George Boutsioukis <gboutsioukis@gmail.com>" |
---|
9 | n/a | |
---|
10 | n/a | import logging |
---|
11 | n/a | import itertools |
---|
12 | n/a | from collections import defaultdict |
---|
13 | n/a | |
---|
14 | n/a | from . import pytree |
---|
15 | n/a | from .btm_utils import reduce_tree |
---|
16 | n/a | |
---|
17 | n/a | class BMNode(object): |
---|
18 | n/a | """Class for a node of the Aho-Corasick automaton used in matching""" |
---|
19 | n/a | count = itertools.count() |
---|
20 | n/a | def __init__(self): |
---|
21 | n/a | self.transition_table = {} |
---|
22 | n/a | self.fixers = [] |
---|
23 | n/a | self.id = next(BMNode.count) |
---|
24 | n/a | self.content = '' |
---|
25 | n/a | |
---|
26 | n/a | class BottomMatcher(object): |
---|
27 | n/a | """The main matcher class. After instantiating the patterns should |
---|
28 | n/a | be added using the add_fixer method""" |
---|
29 | n/a | |
---|
30 | n/a | def __init__(self): |
---|
31 | n/a | self.match = set() |
---|
32 | n/a | self.root = BMNode() |
---|
33 | n/a | self.nodes = [self.root] |
---|
34 | n/a | self.fixers = [] |
---|
35 | n/a | self.logger = logging.getLogger("RefactoringTool") |
---|
36 | n/a | |
---|
37 | n/a | def add_fixer(self, fixer): |
---|
38 | n/a | """Reduces a fixer's pattern tree to a linear path and adds it |
---|
39 | n/a | to the matcher(a common Aho-Corasick automaton). The fixer is |
---|
40 | n/a | appended on the matching states and called when they are |
---|
41 | n/a | reached""" |
---|
42 | n/a | self.fixers.append(fixer) |
---|
43 | n/a | tree = reduce_tree(fixer.pattern_tree) |
---|
44 | n/a | linear = tree.get_linear_subpattern() |
---|
45 | n/a | match_nodes = self.add(linear, start=self.root) |
---|
46 | n/a | for match_node in match_nodes: |
---|
47 | n/a | match_node.fixers.append(fixer) |
---|
48 | n/a | |
---|
49 | n/a | def add(self, pattern, start): |
---|
50 | n/a | "Recursively adds a linear pattern to the AC automaton" |
---|
51 | n/a | #print("adding pattern", pattern, "to", start) |
---|
52 | n/a | if not pattern: |
---|
53 | n/a | #print("empty pattern") |
---|
54 | n/a | return [start] |
---|
55 | n/a | if isinstance(pattern[0], tuple): |
---|
56 | n/a | #alternatives |
---|
57 | n/a | #print("alternatives") |
---|
58 | n/a | match_nodes = [] |
---|
59 | n/a | for alternative in pattern[0]: |
---|
60 | n/a | #add all alternatives, and add the rest of the pattern |
---|
61 | n/a | #to each end node |
---|
62 | n/a | end_nodes = self.add(alternative, start=start) |
---|
63 | n/a | for end in end_nodes: |
---|
64 | n/a | match_nodes.extend(self.add(pattern[1:], end)) |
---|
65 | n/a | return match_nodes |
---|
66 | n/a | else: |
---|
67 | n/a | #single token |
---|
68 | n/a | #not last |
---|
69 | n/a | if pattern[0] not in start.transition_table: |
---|
70 | n/a | #transition did not exist, create new |
---|
71 | n/a | next_node = BMNode() |
---|
72 | n/a | start.transition_table[pattern[0]] = next_node |
---|
73 | n/a | else: |
---|
74 | n/a | #transition exists already, follow |
---|
75 | n/a | next_node = start.transition_table[pattern[0]] |
---|
76 | n/a | |
---|
77 | n/a | if pattern[1:]: |
---|
78 | n/a | end_nodes = self.add(pattern[1:], start=next_node) |
---|
79 | n/a | else: |
---|
80 | n/a | end_nodes = [next_node] |
---|
81 | n/a | return end_nodes |
---|
82 | n/a | |
---|
83 | n/a | def run(self, leaves): |
---|
84 | n/a | """The main interface with the bottom matcher. The tree is |
---|
85 | n/a | traversed from the bottom using the constructed |
---|
86 | n/a | automaton. Nodes are only checked once as the tree is |
---|
87 | n/a | retraversed. When the automaton fails, we give it one more |
---|
88 | n/a | shot(in case the above tree matches as a whole with the |
---|
89 | n/a | rejected leaf), then we break for the next leaf. There is the |
---|
90 | n/a | special case of multiple arguments(see code comments) where we |
---|
91 | n/a | recheck the nodes |
---|
92 | n/a | |
---|
93 | n/a | Args: |
---|
94 | n/a | The leaves of the AST tree to be matched |
---|
95 | n/a | |
---|
96 | n/a | Returns: |
---|
97 | n/a | A dictionary of node matches with fixers as the keys |
---|
98 | n/a | """ |
---|
99 | n/a | current_ac_node = self.root |
---|
100 | n/a | results = defaultdict(list) |
---|
101 | n/a | for leaf in leaves: |
---|
102 | n/a | current_ast_node = leaf |
---|
103 | n/a | while current_ast_node: |
---|
104 | n/a | current_ast_node.was_checked = True |
---|
105 | n/a | for child in current_ast_node.children: |
---|
106 | n/a | # multiple statements, recheck |
---|
107 | n/a | if isinstance(child, pytree.Leaf) and child.value == ";": |
---|
108 | n/a | current_ast_node.was_checked = False |
---|
109 | n/a | break |
---|
110 | n/a | if current_ast_node.type == 1: |
---|
111 | n/a | #name |
---|
112 | n/a | node_token = current_ast_node.value |
---|
113 | n/a | else: |
---|
114 | n/a | node_token = current_ast_node.type |
---|
115 | n/a | |
---|
116 | n/a | if node_token in current_ac_node.transition_table: |
---|
117 | n/a | #token matches |
---|
118 | n/a | current_ac_node = current_ac_node.transition_table[node_token] |
---|
119 | n/a | for fixer in current_ac_node.fixers: |
---|
120 | n/a | if not fixer in results: |
---|
121 | n/a | results[fixer] = [] |
---|
122 | n/a | results[fixer].append(current_ast_node) |
---|
123 | n/a | |
---|
124 | n/a | else: |
---|
125 | n/a | #matching failed, reset automaton |
---|
126 | n/a | current_ac_node = self.root |
---|
127 | n/a | if (current_ast_node.parent is not None |
---|
128 | n/a | and current_ast_node.parent.was_checked): |
---|
129 | n/a | #the rest of the tree upwards has been checked, next leaf |
---|
130 | n/a | break |
---|
131 | n/a | |
---|
132 | n/a | #recheck the rejected node once from the root |
---|
133 | n/a | if node_token in current_ac_node.transition_table: |
---|
134 | n/a | #token matches |
---|
135 | n/a | current_ac_node = current_ac_node.transition_table[node_token] |
---|
136 | n/a | for fixer in current_ac_node.fixers: |
---|
137 | n/a | if not fixer in results.keys(): |
---|
138 | n/a | results[fixer] = [] |
---|
139 | n/a | results[fixer].append(current_ast_node) |
---|
140 | n/a | |
---|
141 | n/a | current_ast_node = current_ast_node.parent |
---|
142 | n/a | return results |
---|
143 | n/a | |
---|
144 | n/a | def print_ac(self): |
---|
145 | n/a | "Prints a graphviz diagram of the BM automaton(for debugging)" |
---|
146 | n/a | print("digraph g{") |
---|
147 | n/a | def print_node(node): |
---|
148 | n/a | for subnode_key in node.transition_table.keys(): |
---|
149 | n/a | subnode = node.transition_table[subnode_key] |
---|
150 | n/a | print("%d -> %d [label=%s] //%s" % |
---|
151 | n/a | (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) |
---|
152 | n/a | if subnode_key == 1: |
---|
153 | n/a | print(subnode.content) |
---|
154 | n/a | print_node(subnode) |
---|
155 | n/a | print_node(self.root) |
---|
156 | n/a | print("}") |
---|
157 | n/a | |
---|
158 | n/a | # taken from pytree.py for debugging; only used by print_ac |
---|
159 | n/a | _type_reprs = {} |
---|
160 | n/a | def type_repr(type_num): |
---|
161 | n/a | global _type_reprs |
---|
162 | n/a | if not _type_reprs: |
---|
163 | n/a | from .pygram import python_symbols |
---|
164 | n/a | # printing tokens is possible but not as useful |
---|
165 | n/a | # from .pgen2 import token // token.__dict__.items(): |
---|
166 | n/a | for name, val in python_symbols.__dict__.items(): |
---|
167 | n/a | if type(val) == int: _type_reprs[val] = name |
---|
168 | n/a | return _type_reprs.setdefault(type_num, type_num) |
---|