ยปCore Development>Code coverage>Lib/lib2to3/btm_utils.py

Python code coverage for Lib/lib2to3/btm_utils.py

#countcontent
1n/a"Utility functions used by the btm_matcher module"
2n/a
3n/afrom . import pytree
4n/afrom .pgen2 import grammar, token
5n/afrom .pygram import pattern_symbols, python_symbols
6n/a
7n/asyms = pattern_symbols
8n/apysyms = python_symbols
9n/atokens = grammar.opmap
10n/atoken_labels = token
11n/a
12n/aTYPE_ANY = -1
13n/aTYPE_ALTERNATIVES = -2
14n/aTYPE_GROUP = -3
15n/a
16n/aclass MinNode(object):
17n/a """This class serves as an intermediate representation of the
18n/a pattern tree during the conversion to sets of leaf-to-root
19n/a subpatterns"""
20n/a
21n/a def __init__(self, type=None, name=None):
22n/a self.type = type
23n/a self.name = name
24n/a self.children = []
25n/a self.leaf = False
26n/a self.parent = None
27n/a self.alternatives = []
28n/a self.group = []
29n/a
30n/a def __repr__(self):
31n/a return str(self.type) + ' ' + str(self.name)
32n/a
33n/a def leaf_to_root(self):
34n/a """Internal method. Returns a characteristic path of the
35n/a pattern tree. This method must be run for all leaves until the
36n/a linear subpatterns are merged into a single"""
37n/a node = self
38n/a subp = []
39n/a while node:
40n/a if node.type == TYPE_ALTERNATIVES:
41n/a node.alternatives.append(subp)
42n/a if len(node.alternatives) == len(node.children):
43n/a #last alternative
44n/a subp = [tuple(node.alternatives)]
45n/a node.alternatives = []
46n/a node = node.parent
47n/a continue
48n/a else:
49n/a node = node.parent
50n/a subp = None
51n/a break
52n/a
53n/a if node.type == TYPE_GROUP:
54n/a node.group.append(subp)
55n/a #probably should check the number of leaves
56n/a if len(node.group) == len(node.children):
57n/a subp = get_characteristic_subpattern(node.group)
58n/a node.group = []
59n/a node = node.parent
60n/a continue
61n/a else:
62n/a node = node.parent
63n/a subp = None
64n/a break
65n/a
66n/a if node.type == token_labels.NAME and node.name:
67n/a #in case of type=name, use the name instead
68n/a subp.append(node.name)
69n/a else:
70n/a subp.append(node.type)
71n/a
72n/a node = node.parent
73n/a return subp
74n/a
75n/a def get_linear_subpattern(self):
76n/a """Drives the leaf_to_root method. The reason that
77n/a leaf_to_root must be run multiple times is because we need to
78n/a reject 'group' matches; for example the alternative form
79n/a (a | b c) creates a group [b c] that needs to be matched. Since
80n/a matching multiple linear patterns overcomes the automaton's
81n/a capabilities, leaf_to_root merges each group into a single
82n/a choice based on 'characteristic'ity,
83n/a
84n/a i.e. (a|b c) -> (a|b) if b more characteristic than c
85n/a
86n/a Returns: The most 'characteristic'(as defined by
87n/a get_characteristic_subpattern) path for the compiled pattern
88n/a tree.
89n/a """
90n/a
91n/a for l in self.leaves():
92n/a subp = l.leaf_to_root()
93n/a if subp:
94n/a return subp
95n/a
96n/a def leaves(self):
97n/a "Generator that returns the leaves of the tree"
98n/a for child in self.children:
99n/a yield from child.leaves()
100n/a if not self.children:
101n/a yield self
102n/a
103n/adef reduce_tree(node, parent=None):
104n/a """
105n/a Internal function. Reduces a compiled pattern tree to an
106n/a intermediate representation suitable for feeding the
107n/a automaton. This also trims off any optional pattern elements(like
108n/a [a], a*).
109n/a """
110n/a
111n/a new_node = None
112n/a #switch on the node type
113n/a if node.type == syms.Matcher:
114n/a #skip
115n/a node = node.children[0]
116n/a
117n/a if node.type == syms.Alternatives :
118n/a #2 cases
119n/a if len(node.children) <= 2:
120n/a #just a single 'Alternative', skip this node
121n/a new_node = reduce_tree(node.children[0], parent)
122n/a else:
123n/a #real alternatives
124n/a new_node = MinNode(type=TYPE_ALTERNATIVES)
125n/a #skip odd children('|' tokens)
126n/a for child in node.children:
127n/a if node.children.index(child)%2:
128n/a continue
129n/a reduced = reduce_tree(child, new_node)
130n/a if reduced is not None:
131n/a new_node.children.append(reduced)
132n/a elif node.type == syms.Alternative:
133n/a if len(node.children) > 1:
134n/a
135n/a new_node = MinNode(type=TYPE_GROUP)
136n/a for child in node.children:
137n/a reduced = reduce_tree(child, new_node)
138n/a if reduced:
139n/a new_node.children.append(reduced)
140n/a if not new_node.children:
141n/a # delete the group if all of the children were reduced to None
142n/a new_node = None
143n/a
144n/a else:
145n/a new_node = reduce_tree(node.children[0], parent)
146n/a
147n/a elif node.type == syms.Unit:
148n/a if (isinstance(node.children[0], pytree.Leaf) and
149n/a node.children[0].value == '('):
150n/a #skip parentheses
151n/a return reduce_tree(node.children[1], parent)
152n/a if ((isinstance(node.children[0], pytree.Leaf) and
153n/a node.children[0].value == '[')
154n/a or
155n/a (len(node.children)>1 and
156n/a hasattr(node.children[1], "value") and
157n/a node.children[1].value == '[')):
158n/a #skip whole unit if its optional
159n/a return None
160n/a
161n/a leaf = True
162n/a details_node = None
163n/a alternatives_node = None
164n/a has_repeater = False
165n/a repeater_node = None
166n/a has_variable_name = False
167n/a
168n/a for child in node.children:
169n/a if child.type == syms.Details:
170n/a leaf = False
171n/a details_node = child
172n/a elif child.type == syms.Repeater:
173n/a has_repeater = True
174n/a repeater_node = child
175n/a elif child.type == syms.Alternatives:
176n/a alternatives_node = child
177n/a if hasattr(child, 'value') and child.value == '=': # variable name
178n/a has_variable_name = True
179n/a
180n/a #skip variable name
181n/a if has_variable_name:
182n/a #skip variable name, '='
183n/a name_leaf = node.children[2]
184n/a if hasattr(name_leaf, 'value') and name_leaf.value == '(':
185n/a # skip parenthesis
186n/a name_leaf = node.children[3]
187n/a else:
188n/a name_leaf = node.children[0]
189n/a
190n/a #set node type
191n/a if name_leaf.type == token_labels.NAME:
192n/a #(python) non-name or wildcard
193n/a if name_leaf.value == 'any':
194n/a new_node = MinNode(type=TYPE_ANY)
195n/a else:
196n/a if hasattr(token_labels, name_leaf.value):
197n/a new_node = MinNode(type=getattr(token_labels, name_leaf.value))
198n/a else:
199n/a new_node = MinNode(type=getattr(pysyms, name_leaf.value))
200n/a
201n/a elif name_leaf.type == token_labels.STRING:
202n/a #(python) name or character; remove the apostrophes from
203n/a #the string value
204n/a name = name_leaf.value.strip("'")
205n/a if name in tokens:
206n/a new_node = MinNode(type=tokens[name])
207n/a else:
208n/a new_node = MinNode(type=token_labels.NAME, name=name)
209n/a elif name_leaf.type == syms.Alternatives:
210n/a new_node = reduce_tree(alternatives_node, parent)
211n/a
212n/a #handle repeaters
213n/a if has_repeater:
214n/a if repeater_node.children[0].value == '*':
215n/a #reduce to None
216n/a new_node = None
217n/a elif repeater_node.children[0].value == '+':
218n/a #reduce to a single occurrence i.e. do nothing
219n/a pass
220n/a else:
221n/a #TODO: handle {min, max} repeaters
222n/a raise NotImplementedError
223n/a pass
224n/a
225n/a #add children
226n/a if details_node and new_node is not None:
227n/a for child in details_node.children[1:-1]:
228n/a #skip '<', '>' markers
229n/a reduced = reduce_tree(child, new_node)
230n/a if reduced is not None:
231n/a new_node.children.append(reduced)
232n/a if new_node:
233n/a new_node.parent = parent
234n/a return new_node
235n/a
236n/a
237n/adef get_characteristic_subpattern(subpatterns):
238n/a """Picks the most characteristic from a list of linear patterns
239n/a Current order used is:
240n/a names > common_names > common_chars
241n/a """
242n/a if not isinstance(subpatterns, list):
243n/a return subpatterns
244n/a if len(subpatterns)==1:
245n/a return subpatterns[0]
246n/a
247n/a # first pick out the ones containing variable names
248n/a subpatterns_with_names = []
249n/a subpatterns_with_common_names = []
250n/a common_names = ['in', 'for', 'if' , 'not', 'None']
251n/a subpatterns_with_common_chars = []
252n/a common_chars = "[]().,:"
253n/a for subpattern in subpatterns:
254n/a if any(rec_test(subpattern, lambda x: type(x) is str)):
255n/a if any(rec_test(subpattern,
256n/a lambda x: isinstance(x, str) and x in common_chars)):
257n/a subpatterns_with_common_chars.append(subpattern)
258n/a elif any(rec_test(subpattern,
259n/a lambda x: isinstance(x, str) and x in common_names)):
260n/a subpatterns_with_common_names.append(subpattern)
261n/a
262n/a else:
263n/a subpatterns_with_names.append(subpattern)
264n/a
265n/a if subpatterns_with_names:
266n/a subpatterns = subpatterns_with_names
267n/a elif subpatterns_with_common_names:
268n/a subpatterns = subpatterns_with_common_names
269n/a elif subpatterns_with_common_chars:
270n/a subpatterns = subpatterns_with_common_chars
271n/a # of the remaining subpatterns pick out the longest one
272n/a return max(subpatterns, key=len)
273n/a
274n/adef rec_test(sequence, test_func):
275n/a """Tests test_func on all items of sequence and items of included
276n/a sub-iterables"""
277n/a for x in sequence:
278n/a if isinstance(x, (list, tuple)):
279n/a yield from rec_test(x, test_func)
280n/a else:
281n/a yield test_func(x)