1 | n/a | /* Parser generator */ |
---|
2 | n/a | |
---|
3 | n/a | /* For a description, see the comments at end of this file */ |
---|
4 | n/a | |
---|
5 | n/a | #include "Python.h" |
---|
6 | n/a | #include "pgenheaders.h" |
---|
7 | n/a | #include "token.h" |
---|
8 | n/a | #include "node.h" |
---|
9 | n/a | #include "grammar.h" |
---|
10 | n/a | #include "metagrammar.h" |
---|
11 | n/a | #include "pgen.h" |
---|
12 | n/a | |
---|
13 | n/a | extern int Py_DebugFlag; |
---|
14 | n/a | extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ |
---|
15 | n/a | |
---|
16 | n/a | |
---|
17 | n/a | /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ |
---|
18 | n/a | |
---|
19 | n/a | typedef struct _nfaarc { |
---|
20 | n/a | int ar_label; |
---|
21 | n/a | int ar_arrow; |
---|
22 | n/a | } nfaarc; |
---|
23 | n/a | |
---|
24 | n/a | typedef struct _nfastate { |
---|
25 | n/a | int st_narcs; |
---|
26 | n/a | nfaarc *st_arc; |
---|
27 | n/a | } nfastate; |
---|
28 | n/a | |
---|
29 | n/a | typedef struct _nfa { |
---|
30 | n/a | int nf_type; |
---|
31 | n/a | char *nf_name; |
---|
32 | n/a | int nf_nstates; |
---|
33 | n/a | nfastate *nf_state; |
---|
34 | n/a | int nf_start, nf_finish; |
---|
35 | n/a | } nfa; |
---|
36 | n/a | |
---|
37 | n/a | /* Forward */ |
---|
38 | n/a | static void compile_rhs(labellist *ll, |
---|
39 | n/a | nfa *nf, node *n, int *pa, int *pb); |
---|
40 | n/a | static void compile_alt(labellist *ll, |
---|
41 | n/a | nfa *nf, node *n, int *pa, int *pb); |
---|
42 | n/a | static void compile_item(labellist *ll, |
---|
43 | n/a | nfa *nf, node *n, int *pa, int *pb); |
---|
44 | n/a | static void compile_atom(labellist *ll, |
---|
45 | n/a | nfa *nf, node *n, int *pa, int *pb); |
---|
46 | n/a | |
---|
47 | n/a | static int |
---|
48 | n/a | addnfastate(nfa *nf) |
---|
49 | n/a | { |
---|
50 | n/a | nfastate *st; |
---|
51 | n/a | |
---|
52 | n/a | nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, |
---|
53 | n/a | sizeof(nfastate) * (nf->nf_nstates + 1)); |
---|
54 | n/a | if (nf->nf_state == NULL) |
---|
55 | n/a | Py_FatalError("out of mem"); |
---|
56 | n/a | st = &nf->nf_state[nf->nf_nstates++]; |
---|
57 | n/a | st->st_narcs = 0; |
---|
58 | n/a | st->st_arc = NULL; |
---|
59 | n/a | return st - nf->nf_state; |
---|
60 | n/a | } |
---|
61 | n/a | |
---|
62 | n/a | static void |
---|
63 | n/a | addnfaarc(nfa *nf, int from, int to, int lbl) |
---|
64 | n/a | { |
---|
65 | n/a | nfastate *st; |
---|
66 | n/a | nfaarc *ar; |
---|
67 | n/a | |
---|
68 | n/a | st = &nf->nf_state[from]; |
---|
69 | n/a | st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, |
---|
70 | n/a | sizeof(nfaarc) * (st->st_narcs + 1)); |
---|
71 | n/a | if (st->st_arc == NULL) |
---|
72 | n/a | Py_FatalError("out of mem"); |
---|
73 | n/a | ar = &st->st_arc[st->st_narcs++]; |
---|
74 | n/a | ar->ar_label = lbl; |
---|
75 | n/a | ar->ar_arrow = to; |
---|
76 | n/a | } |
---|
77 | n/a | |
---|
78 | n/a | static nfa * |
---|
79 | n/a | newnfa(char *name) |
---|
80 | n/a | { |
---|
81 | n/a | nfa *nf; |
---|
82 | n/a | static int type = NT_OFFSET; /* All types will be disjunct */ |
---|
83 | n/a | |
---|
84 | n/a | nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); |
---|
85 | n/a | if (nf == NULL) |
---|
86 | n/a | Py_FatalError("no mem for new nfa"); |
---|
87 | n/a | nf->nf_type = type++; |
---|
88 | n/a | nf->nf_name = name; /* XXX strdup(name) ??? */ |
---|
89 | n/a | nf->nf_nstates = 0; |
---|
90 | n/a | nf->nf_state = NULL; |
---|
91 | n/a | nf->nf_start = nf->nf_finish = -1; |
---|
92 | n/a | return nf; |
---|
93 | n/a | } |
---|
94 | n/a | |
---|
95 | n/a | typedef struct _nfagrammar { |
---|
96 | n/a | int gr_nnfas; |
---|
97 | n/a | nfa **gr_nfa; |
---|
98 | n/a | labellist gr_ll; |
---|
99 | n/a | } nfagrammar; |
---|
100 | n/a | |
---|
101 | n/a | /* Forward */ |
---|
102 | n/a | static void compile_rule(nfagrammar *gr, node *n); |
---|
103 | n/a | |
---|
104 | n/a | static nfagrammar * |
---|
105 | n/a | newnfagrammar(void) |
---|
106 | n/a | { |
---|
107 | n/a | nfagrammar *gr; |
---|
108 | n/a | |
---|
109 | n/a | gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); |
---|
110 | n/a | if (gr == NULL) |
---|
111 | n/a | Py_FatalError("no mem for new nfa grammar"); |
---|
112 | n/a | gr->gr_nnfas = 0; |
---|
113 | n/a | gr->gr_nfa = NULL; |
---|
114 | n/a | gr->gr_ll.ll_nlabels = 0; |
---|
115 | n/a | gr->gr_ll.ll_label = NULL; |
---|
116 | n/a | addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); |
---|
117 | n/a | return gr; |
---|
118 | n/a | } |
---|
119 | n/a | |
---|
120 | n/a | static void |
---|
121 | n/a | freenfagrammar(nfagrammar *gr) |
---|
122 | n/a | { |
---|
123 | n/a | for (int i = 0; i < gr->gr_nnfas; i++) { |
---|
124 | n/a | PyObject_FREE(gr->gr_nfa[i]->nf_state); |
---|
125 | n/a | } |
---|
126 | n/a | PyObject_FREE(gr->gr_nfa); |
---|
127 | n/a | PyObject_FREE(gr); |
---|
128 | n/a | } |
---|
129 | n/a | |
---|
130 | n/a | static nfa * |
---|
131 | n/a | addnfa(nfagrammar *gr, char *name) |
---|
132 | n/a | { |
---|
133 | n/a | nfa *nf; |
---|
134 | n/a | |
---|
135 | n/a | nf = newnfa(name); |
---|
136 | n/a | gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, |
---|
137 | n/a | sizeof(nfa*) * (gr->gr_nnfas + 1)); |
---|
138 | n/a | if (gr->gr_nfa == NULL) |
---|
139 | n/a | Py_FatalError("out of mem"); |
---|
140 | n/a | gr->gr_nfa[gr->gr_nnfas++] = nf; |
---|
141 | n/a | addlabel(&gr->gr_ll, NAME, nf->nf_name); |
---|
142 | n/a | return nf; |
---|
143 | n/a | } |
---|
144 | n/a | |
---|
145 | n/a | #ifdef Py_DEBUG |
---|
146 | n/a | |
---|
147 | n/a | static const char REQNFMT[] = "metacompile: less than %d children\n"; |
---|
148 | n/a | |
---|
149 | n/a | #define REQN(i, count) do { \ |
---|
150 | n/a | if (i < count) { \ |
---|
151 | n/a | fprintf(stderr, REQNFMT, count); \ |
---|
152 | n/a | Py_FatalError("REQN"); \ |
---|
153 | n/a | } \ |
---|
154 | n/a | } while (0) |
---|
155 | n/a | |
---|
156 | n/a | #else |
---|
157 | n/a | #define REQN(i, count) /* empty */ |
---|
158 | n/a | #endif |
---|
159 | n/a | |
---|
160 | n/a | static nfagrammar * |
---|
161 | n/a | metacompile(node *n) |
---|
162 | n/a | { |
---|
163 | n/a | nfagrammar *gr; |
---|
164 | n/a | int i; |
---|
165 | n/a | |
---|
166 | n/a | if (Py_DebugFlag) |
---|
167 | n/a | printf("Compiling (meta-) parse tree into NFA grammar\n"); |
---|
168 | n/a | gr = newnfagrammar(); |
---|
169 | n/a | REQ(n, MSTART); |
---|
170 | n/a | i = n->n_nchildren - 1; /* Last child is ENDMARKER */ |
---|
171 | n/a | n = n->n_child; |
---|
172 | n/a | for (; --i >= 0; n++) { |
---|
173 | n/a | if (n->n_type != NEWLINE) |
---|
174 | n/a | compile_rule(gr, n); |
---|
175 | n/a | } |
---|
176 | n/a | return gr; |
---|
177 | n/a | } |
---|
178 | n/a | |
---|
179 | n/a | static void |
---|
180 | n/a | compile_rule(nfagrammar *gr, node *n) |
---|
181 | n/a | { |
---|
182 | n/a | nfa *nf; |
---|
183 | n/a | |
---|
184 | n/a | REQ(n, RULE); |
---|
185 | n/a | REQN(n->n_nchildren, 4); |
---|
186 | n/a | n = n->n_child; |
---|
187 | n/a | REQ(n, NAME); |
---|
188 | n/a | nf = addnfa(gr, n->n_str); |
---|
189 | n/a | n++; |
---|
190 | n/a | REQ(n, COLON); |
---|
191 | n/a | n++; |
---|
192 | n/a | REQ(n, RHS); |
---|
193 | n/a | compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); |
---|
194 | n/a | n++; |
---|
195 | n/a | REQ(n, NEWLINE); |
---|
196 | n/a | } |
---|
197 | n/a | |
---|
198 | n/a | static void |
---|
199 | n/a | compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) |
---|
200 | n/a | { |
---|
201 | n/a | int i; |
---|
202 | n/a | int a, b; |
---|
203 | n/a | |
---|
204 | n/a | REQ(n, RHS); |
---|
205 | n/a | i = n->n_nchildren; |
---|
206 | n/a | REQN(i, 1); |
---|
207 | n/a | n = n->n_child; |
---|
208 | n/a | REQ(n, ALT); |
---|
209 | n/a | compile_alt(ll, nf, n, pa, pb); |
---|
210 | n/a | if (--i <= 0) |
---|
211 | n/a | return; |
---|
212 | n/a | n++; |
---|
213 | n/a | a = *pa; |
---|
214 | n/a | b = *pb; |
---|
215 | n/a | *pa = addnfastate(nf); |
---|
216 | n/a | *pb = addnfastate(nf); |
---|
217 | n/a | addnfaarc(nf, *pa, a, EMPTY); |
---|
218 | n/a | addnfaarc(nf, b, *pb, EMPTY); |
---|
219 | n/a | for (; --i >= 0; n++) { |
---|
220 | n/a | REQ(n, VBAR); |
---|
221 | n/a | REQN(i, 1); |
---|
222 | n/a | --i; |
---|
223 | n/a | n++; |
---|
224 | n/a | REQ(n, ALT); |
---|
225 | n/a | compile_alt(ll, nf, n, &a, &b); |
---|
226 | n/a | addnfaarc(nf, *pa, a, EMPTY); |
---|
227 | n/a | addnfaarc(nf, b, *pb, EMPTY); |
---|
228 | n/a | } |
---|
229 | n/a | } |
---|
230 | n/a | |
---|
231 | n/a | static void |
---|
232 | n/a | compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) |
---|
233 | n/a | { |
---|
234 | n/a | int i; |
---|
235 | n/a | int a, b; |
---|
236 | n/a | |
---|
237 | n/a | REQ(n, ALT); |
---|
238 | n/a | i = n->n_nchildren; |
---|
239 | n/a | REQN(i, 1); |
---|
240 | n/a | n = n->n_child; |
---|
241 | n/a | REQ(n, ITEM); |
---|
242 | n/a | compile_item(ll, nf, n, pa, pb); |
---|
243 | n/a | --i; |
---|
244 | n/a | n++; |
---|
245 | n/a | for (; --i >= 0; n++) { |
---|
246 | n/a | REQ(n, ITEM); |
---|
247 | n/a | compile_item(ll, nf, n, &a, &b); |
---|
248 | n/a | addnfaarc(nf, *pb, a, EMPTY); |
---|
249 | n/a | *pb = b; |
---|
250 | n/a | } |
---|
251 | n/a | } |
---|
252 | n/a | |
---|
253 | n/a | static void |
---|
254 | n/a | compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) |
---|
255 | n/a | { |
---|
256 | n/a | int i; |
---|
257 | n/a | int a, b; |
---|
258 | n/a | |
---|
259 | n/a | REQ(n, ITEM); |
---|
260 | n/a | i = n->n_nchildren; |
---|
261 | n/a | REQN(i, 1); |
---|
262 | n/a | n = n->n_child; |
---|
263 | n/a | if (n->n_type == LSQB) { |
---|
264 | n/a | REQN(i, 3); |
---|
265 | n/a | n++; |
---|
266 | n/a | REQ(n, RHS); |
---|
267 | n/a | *pa = addnfastate(nf); |
---|
268 | n/a | *pb = addnfastate(nf); |
---|
269 | n/a | addnfaarc(nf, *pa, *pb, EMPTY); |
---|
270 | n/a | compile_rhs(ll, nf, n, &a, &b); |
---|
271 | n/a | addnfaarc(nf, *pa, a, EMPTY); |
---|
272 | n/a | addnfaarc(nf, b, *pb, EMPTY); |
---|
273 | n/a | REQN(i, 1); |
---|
274 | n/a | n++; |
---|
275 | n/a | REQ(n, RSQB); |
---|
276 | n/a | } |
---|
277 | n/a | else { |
---|
278 | n/a | compile_atom(ll, nf, n, pa, pb); |
---|
279 | n/a | if (--i <= 0) |
---|
280 | n/a | return; |
---|
281 | n/a | n++; |
---|
282 | n/a | addnfaarc(nf, *pb, *pa, EMPTY); |
---|
283 | n/a | if (n->n_type == STAR) |
---|
284 | n/a | *pb = *pa; |
---|
285 | n/a | else |
---|
286 | n/a | REQ(n, PLUS); |
---|
287 | n/a | } |
---|
288 | n/a | } |
---|
289 | n/a | |
---|
290 | n/a | static void |
---|
291 | n/a | compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) |
---|
292 | n/a | { |
---|
293 | n/a | int i; |
---|
294 | n/a | |
---|
295 | n/a | REQ(n, ATOM); |
---|
296 | n/a | i = n->n_nchildren; |
---|
297 | n/a | (void)i; /* Don't warn about set but unused */ |
---|
298 | n/a | REQN(i, 1); |
---|
299 | n/a | n = n->n_child; |
---|
300 | n/a | if (n->n_type == LPAR) { |
---|
301 | n/a | REQN(i, 3); |
---|
302 | n/a | n++; |
---|
303 | n/a | REQ(n, RHS); |
---|
304 | n/a | compile_rhs(ll, nf, n, pa, pb); |
---|
305 | n/a | n++; |
---|
306 | n/a | REQ(n, RPAR); |
---|
307 | n/a | } |
---|
308 | n/a | else if (n->n_type == NAME || n->n_type == STRING) { |
---|
309 | n/a | *pa = addnfastate(nf); |
---|
310 | n/a | *pb = addnfastate(nf); |
---|
311 | n/a | addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); |
---|
312 | n/a | } |
---|
313 | n/a | else |
---|
314 | n/a | REQ(n, NAME); |
---|
315 | n/a | } |
---|
316 | n/a | |
---|
317 | n/a | static void |
---|
318 | n/a | dumpstate(labellist *ll, nfa *nf, int istate) |
---|
319 | n/a | { |
---|
320 | n/a | nfastate *st; |
---|
321 | n/a | int i; |
---|
322 | n/a | nfaarc *ar; |
---|
323 | n/a | |
---|
324 | n/a | printf("%c%2d%c", |
---|
325 | n/a | istate == nf->nf_start ? '*' : ' ', |
---|
326 | n/a | istate, |
---|
327 | n/a | istate == nf->nf_finish ? '.' : ' '); |
---|
328 | n/a | st = &nf->nf_state[istate]; |
---|
329 | n/a | ar = st->st_arc; |
---|
330 | n/a | for (i = 0; i < st->st_narcs; i++) { |
---|
331 | n/a | if (i > 0) |
---|
332 | n/a | printf("\n "); |
---|
333 | n/a | printf("-> %2d %s", ar->ar_arrow, |
---|
334 | n/a | PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); |
---|
335 | n/a | ar++; |
---|
336 | n/a | } |
---|
337 | n/a | printf("\n"); |
---|
338 | n/a | } |
---|
339 | n/a | |
---|
340 | n/a | static void |
---|
341 | n/a | dumpnfa(labellist *ll, nfa *nf) |
---|
342 | n/a | { |
---|
343 | n/a | int i; |
---|
344 | n/a | |
---|
345 | n/a | printf("NFA '%s' has %d states; start %d, finish %d\n", |
---|
346 | n/a | nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); |
---|
347 | n/a | for (i = 0; i < nf->nf_nstates; i++) |
---|
348 | n/a | dumpstate(ll, nf, i); |
---|
349 | n/a | } |
---|
350 | n/a | |
---|
351 | n/a | |
---|
352 | n/a | /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */ |
---|
353 | n/a | |
---|
354 | n/a | static void |
---|
355 | n/a | addclosure(bitset ss, nfa *nf, int istate) |
---|
356 | n/a | { |
---|
357 | n/a | if (addbit(ss, istate)) { |
---|
358 | n/a | nfastate *st = &nf->nf_state[istate]; |
---|
359 | n/a | nfaarc *ar = st->st_arc; |
---|
360 | n/a | int i; |
---|
361 | n/a | |
---|
362 | n/a | for (i = st->st_narcs; --i >= 0; ) { |
---|
363 | n/a | if (ar->ar_label == EMPTY) |
---|
364 | n/a | addclosure(ss, nf, ar->ar_arrow); |
---|
365 | n/a | ar++; |
---|
366 | n/a | } |
---|
367 | n/a | } |
---|
368 | n/a | } |
---|
369 | n/a | |
---|
370 | n/a | typedef struct _ss_arc { |
---|
371 | n/a | bitset sa_bitset; |
---|
372 | n/a | int sa_arrow; |
---|
373 | n/a | int sa_label; |
---|
374 | n/a | } ss_arc; |
---|
375 | n/a | |
---|
376 | n/a | typedef struct _ss_state { |
---|
377 | n/a | bitset ss_ss; |
---|
378 | n/a | int ss_narcs; |
---|
379 | n/a | struct _ss_arc *ss_arc; |
---|
380 | n/a | int ss_deleted; |
---|
381 | n/a | int ss_finish; |
---|
382 | n/a | int ss_rename; |
---|
383 | n/a | } ss_state; |
---|
384 | n/a | |
---|
385 | n/a | typedef struct _ss_dfa { |
---|
386 | n/a | int sd_nstates; |
---|
387 | n/a | ss_state *sd_state; |
---|
388 | n/a | } ss_dfa; |
---|
389 | n/a | |
---|
390 | n/a | /* Forward */ |
---|
391 | n/a | static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, |
---|
392 | n/a | labellist *ll, const char *msg); |
---|
393 | n/a | static void simplify(int xx_nstates, ss_state *xx_state); |
---|
394 | n/a | static void convert(dfa *d, int xx_nstates, ss_state *xx_state); |
---|
395 | n/a | |
---|
396 | n/a | static void |
---|
397 | n/a | makedfa(nfagrammar *gr, nfa *nf, dfa *d) |
---|
398 | n/a | { |
---|
399 | n/a | int nbits = nf->nf_nstates; |
---|
400 | n/a | bitset ss; |
---|
401 | n/a | int xx_nstates; |
---|
402 | n/a | ss_state *xx_state, *yy; |
---|
403 | n/a | ss_arc *zz; |
---|
404 | n/a | int istate, jstate, iarc, jarc, ibit; |
---|
405 | n/a | nfastate *st; |
---|
406 | n/a | nfaarc *ar; |
---|
407 | n/a | |
---|
408 | n/a | ss = newbitset(nbits); |
---|
409 | n/a | addclosure(ss, nf, nf->nf_start); |
---|
410 | n/a | xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); |
---|
411 | n/a | if (xx_state == NULL) |
---|
412 | n/a | Py_FatalError("no mem for xx_state in makedfa"); |
---|
413 | n/a | xx_nstates = 1; |
---|
414 | n/a | yy = &xx_state[0]; |
---|
415 | n/a | yy->ss_ss = ss; |
---|
416 | n/a | yy->ss_narcs = 0; |
---|
417 | n/a | yy->ss_arc = NULL; |
---|
418 | n/a | yy->ss_deleted = 0; |
---|
419 | n/a | yy->ss_finish = testbit(ss, nf->nf_finish); |
---|
420 | n/a | if (yy->ss_finish) |
---|
421 | n/a | printf("Error: nonterminal '%s' may produce empty.\n", |
---|
422 | n/a | nf->nf_name); |
---|
423 | n/a | |
---|
424 | n/a | /* This algorithm is from a book written before |
---|
425 | n/a | the invention of structured programming... */ |
---|
426 | n/a | |
---|
427 | n/a | /* For each unmarked state... */ |
---|
428 | n/a | for (istate = 0; istate < xx_nstates; ++istate) { |
---|
429 | n/a | size_t size; |
---|
430 | n/a | yy = &xx_state[istate]; |
---|
431 | n/a | ss = yy->ss_ss; |
---|
432 | n/a | /* For all its states... */ |
---|
433 | n/a | for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { |
---|
434 | n/a | if (!testbit(ss, ibit)) |
---|
435 | n/a | continue; |
---|
436 | n/a | st = &nf->nf_state[ibit]; |
---|
437 | n/a | /* For all non-empty arcs from this state... */ |
---|
438 | n/a | for (iarc = 0; iarc < st->st_narcs; iarc++) { |
---|
439 | n/a | ar = &st->st_arc[iarc]; |
---|
440 | n/a | if (ar->ar_label == EMPTY) |
---|
441 | n/a | continue; |
---|
442 | n/a | /* Look up in list of arcs from this state */ |
---|
443 | n/a | for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { |
---|
444 | n/a | zz = &yy->ss_arc[jarc]; |
---|
445 | n/a | if (ar->ar_label == zz->sa_label) |
---|
446 | n/a | goto found; |
---|
447 | n/a | } |
---|
448 | n/a | /* Add new arc for this state */ |
---|
449 | n/a | size = sizeof(ss_arc) * (yy->ss_narcs + 1); |
---|
450 | n/a | yy->ss_arc = (ss_arc *)PyObject_REALLOC( |
---|
451 | n/a | yy->ss_arc, size); |
---|
452 | n/a | if (yy->ss_arc == NULL) |
---|
453 | n/a | Py_FatalError("out of mem"); |
---|
454 | n/a | zz = &yy->ss_arc[yy->ss_narcs++]; |
---|
455 | n/a | zz->sa_label = ar->ar_label; |
---|
456 | n/a | zz->sa_bitset = newbitset(nbits); |
---|
457 | n/a | zz->sa_arrow = -1; |
---|
458 | n/a | found: ; |
---|
459 | n/a | /* Add destination */ |
---|
460 | n/a | addclosure(zz->sa_bitset, nf, ar->ar_arrow); |
---|
461 | n/a | } |
---|
462 | n/a | } |
---|
463 | n/a | /* Now look up all the arrow states */ |
---|
464 | n/a | for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { |
---|
465 | n/a | zz = &xx_state[istate].ss_arc[jarc]; |
---|
466 | n/a | for (jstate = 0; jstate < xx_nstates; jstate++) { |
---|
467 | n/a | if (samebitset(zz->sa_bitset, |
---|
468 | n/a | xx_state[jstate].ss_ss, nbits)) { |
---|
469 | n/a | zz->sa_arrow = jstate; |
---|
470 | n/a | goto done; |
---|
471 | n/a | } |
---|
472 | n/a | } |
---|
473 | n/a | size = sizeof(ss_state) * (xx_nstates + 1); |
---|
474 | n/a | xx_state = (ss_state *)PyObject_REALLOC(xx_state, |
---|
475 | n/a | size); |
---|
476 | n/a | if (xx_state == NULL) |
---|
477 | n/a | Py_FatalError("out of mem"); |
---|
478 | n/a | zz->sa_arrow = xx_nstates; |
---|
479 | n/a | yy = &xx_state[xx_nstates++]; |
---|
480 | n/a | yy->ss_ss = zz->sa_bitset; |
---|
481 | n/a | yy->ss_narcs = 0; |
---|
482 | n/a | yy->ss_arc = NULL; |
---|
483 | n/a | yy->ss_deleted = 0; |
---|
484 | n/a | yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); |
---|
485 | n/a | done: ; |
---|
486 | n/a | } |
---|
487 | n/a | } |
---|
488 | n/a | |
---|
489 | n/a | if (Py_DebugFlag) |
---|
490 | n/a | printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, |
---|
491 | n/a | "before minimizing"); |
---|
492 | n/a | |
---|
493 | n/a | simplify(xx_nstates, xx_state); |
---|
494 | n/a | |
---|
495 | n/a | if (Py_DebugFlag) |
---|
496 | n/a | printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, |
---|
497 | n/a | "after minimizing"); |
---|
498 | n/a | |
---|
499 | n/a | convert(d, xx_nstates, xx_state); |
---|
500 | n/a | |
---|
501 | n/a | for (int i = 0; i < xx_nstates; i++) { |
---|
502 | n/a | for (int j = 0; j < xx_state[i].ss_narcs; j++) |
---|
503 | n/a | delbitset(xx_state[i].ss_arc[j].sa_bitset); |
---|
504 | n/a | PyObject_FREE(xx_state[i].ss_arc); |
---|
505 | n/a | } |
---|
506 | n/a | PyObject_FREE(xx_state); |
---|
507 | n/a | } |
---|
508 | n/a | |
---|
509 | n/a | static void |
---|
510 | n/a | printssdfa(int xx_nstates, ss_state *xx_state, int nbits, |
---|
511 | n/a | labellist *ll, const char *msg) |
---|
512 | n/a | { |
---|
513 | n/a | int i, ibit, iarc; |
---|
514 | n/a | ss_state *yy; |
---|
515 | n/a | ss_arc *zz; |
---|
516 | n/a | |
---|
517 | n/a | printf("Subset DFA %s\n", msg); |
---|
518 | n/a | for (i = 0; i < xx_nstates; i++) { |
---|
519 | n/a | yy = &xx_state[i]; |
---|
520 | n/a | if (yy->ss_deleted) |
---|
521 | n/a | continue; |
---|
522 | n/a | printf(" Subset %d", i); |
---|
523 | n/a | if (yy->ss_finish) |
---|
524 | n/a | printf(" (finish)"); |
---|
525 | n/a | printf(" { "); |
---|
526 | n/a | for (ibit = 0; ibit < nbits; ibit++) { |
---|
527 | n/a | if (testbit(yy->ss_ss, ibit)) |
---|
528 | n/a | printf("%d ", ibit); |
---|
529 | n/a | } |
---|
530 | n/a | printf("}\n"); |
---|
531 | n/a | for (iarc = 0; iarc < yy->ss_narcs; iarc++) { |
---|
532 | n/a | zz = &yy->ss_arc[iarc]; |
---|
533 | n/a | printf(" Arc to state %d, label %s\n", |
---|
534 | n/a | zz->sa_arrow, |
---|
535 | n/a | PyGrammar_LabelRepr( |
---|
536 | n/a | &ll->ll_label[zz->sa_label])); |
---|
537 | n/a | } |
---|
538 | n/a | } |
---|
539 | n/a | } |
---|
540 | n/a | |
---|
541 | n/a | |
---|
542 | n/a | /* PART THREE -- SIMPLIFY DFA */ |
---|
543 | n/a | |
---|
544 | n/a | /* Simplify the DFA by repeatedly eliminating states that are |
---|
545 | n/a | equivalent to another oner. This is NOT Algorithm 3.3 from |
---|
546 | n/a | [Aho&Ullman 77]. It does not always finds the minimal DFA, |
---|
547 | n/a | but it does usually make a much smaller one... (For an example |
---|
548 | n/a | of sub-optimal behavior, try S: x a b+ | y a b+.) |
---|
549 | n/a | */ |
---|
550 | n/a | |
---|
551 | n/a | static int |
---|
552 | n/a | samestate(ss_state *s1, ss_state *s2) |
---|
553 | n/a | { |
---|
554 | n/a | int i; |
---|
555 | n/a | |
---|
556 | n/a | if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) |
---|
557 | n/a | return 0; |
---|
558 | n/a | for (i = 0; i < s1->ss_narcs; i++) { |
---|
559 | n/a | if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || |
---|
560 | n/a | s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) |
---|
561 | n/a | return 0; |
---|
562 | n/a | } |
---|
563 | n/a | return 1; |
---|
564 | n/a | } |
---|
565 | n/a | |
---|
566 | n/a | static void |
---|
567 | n/a | renamestates(int xx_nstates, ss_state *xx_state, int from, int to) |
---|
568 | n/a | { |
---|
569 | n/a | int i, j; |
---|
570 | n/a | |
---|
571 | n/a | if (Py_DebugFlag) |
---|
572 | n/a | printf("Rename state %d to %d.\n", from, to); |
---|
573 | n/a | for (i = 0; i < xx_nstates; i++) { |
---|
574 | n/a | if (xx_state[i].ss_deleted) |
---|
575 | n/a | continue; |
---|
576 | n/a | for (j = 0; j < xx_state[i].ss_narcs; j++) { |
---|
577 | n/a | if (xx_state[i].ss_arc[j].sa_arrow == from) |
---|
578 | n/a | xx_state[i].ss_arc[j].sa_arrow = to; |
---|
579 | n/a | } |
---|
580 | n/a | } |
---|
581 | n/a | } |
---|
582 | n/a | |
---|
583 | n/a | static void |
---|
584 | n/a | simplify(int xx_nstates, ss_state *xx_state) |
---|
585 | n/a | { |
---|
586 | n/a | int changes; |
---|
587 | n/a | int i, j; |
---|
588 | n/a | |
---|
589 | n/a | do { |
---|
590 | n/a | changes = 0; |
---|
591 | n/a | for (i = 1; i < xx_nstates; i++) { |
---|
592 | n/a | if (xx_state[i].ss_deleted) |
---|
593 | n/a | continue; |
---|
594 | n/a | for (j = 0; j < i; j++) { |
---|
595 | n/a | if (xx_state[j].ss_deleted) |
---|
596 | n/a | continue; |
---|
597 | n/a | if (samestate(&xx_state[i], &xx_state[j])) { |
---|
598 | n/a | xx_state[i].ss_deleted++; |
---|
599 | n/a | renamestates(xx_nstates, xx_state, |
---|
600 | n/a | i, j); |
---|
601 | n/a | changes++; |
---|
602 | n/a | break; |
---|
603 | n/a | } |
---|
604 | n/a | } |
---|
605 | n/a | } |
---|
606 | n/a | } while (changes); |
---|
607 | n/a | } |
---|
608 | n/a | |
---|
609 | n/a | |
---|
610 | n/a | /* PART FOUR -- GENERATE PARSING TABLES */ |
---|
611 | n/a | |
---|
612 | n/a | /* Convert the DFA into a grammar that can be used by our parser */ |
---|
613 | n/a | |
---|
614 | n/a | static void |
---|
615 | n/a | convert(dfa *d, int xx_nstates, ss_state *xx_state) |
---|
616 | n/a | { |
---|
617 | n/a | int i, j; |
---|
618 | n/a | ss_state *yy; |
---|
619 | n/a | ss_arc *zz; |
---|
620 | n/a | |
---|
621 | n/a | for (i = 0; i < xx_nstates; i++) { |
---|
622 | n/a | yy = &xx_state[i]; |
---|
623 | n/a | if (yy->ss_deleted) |
---|
624 | n/a | continue; |
---|
625 | n/a | yy->ss_rename = addstate(d); |
---|
626 | n/a | } |
---|
627 | n/a | |
---|
628 | n/a | for (i = 0; i < xx_nstates; i++) { |
---|
629 | n/a | yy = &xx_state[i]; |
---|
630 | n/a | if (yy->ss_deleted) |
---|
631 | n/a | continue; |
---|
632 | n/a | for (j = 0; j < yy->ss_narcs; j++) { |
---|
633 | n/a | zz = &yy->ss_arc[j]; |
---|
634 | n/a | addarc(d, yy->ss_rename, |
---|
635 | n/a | xx_state[zz->sa_arrow].ss_rename, |
---|
636 | n/a | zz->sa_label); |
---|
637 | n/a | } |
---|
638 | n/a | if (yy->ss_finish) |
---|
639 | n/a | addarc(d, yy->ss_rename, yy->ss_rename, 0); |
---|
640 | n/a | } |
---|
641 | n/a | |
---|
642 | n/a | d->d_initial = 0; |
---|
643 | n/a | } |
---|
644 | n/a | |
---|
645 | n/a | |
---|
646 | n/a | /* PART FIVE -- GLUE IT ALL TOGETHER */ |
---|
647 | n/a | |
---|
648 | n/a | static grammar * |
---|
649 | n/a | maketables(nfagrammar *gr) |
---|
650 | n/a | { |
---|
651 | n/a | int i; |
---|
652 | n/a | nfa *nf; |
---|
653 | n/a | dfa *d; |
---|
654 | n/a | grammar *g; |
---|
655 | n/a | |
---|
656 | n/a | if (gr->gr_nnfas == 0) |
---|
657 | n/a | return NULL; |
---|
658 | n/a | g = newgrammar(gr->gr_nfa[0]->nf_type); |
---|
659 | n/a | /* XXX first rule must be start rule */ |
---|
660 | n/a | g->g_ll = gr->gr_ll; |
---|
661 | n/a | |
---|
662 | n/a | for (i = 0; i < gr->gr_nnfas; i++) { |
---|
663 | n/a | nf = gr->gr_nfa[i]; |
---|
664 | n/a | if (Py_DebugFlag) { |
---|
665 | n/a | printf("Dump of NFA for '%s' ...\n", nf->nf_name); |
---|
666 | n/a | dumpnfa(&gr->gr_ll, nf); |
---|
667 | n/a | printf("Making DFA for '%s' ...\n", nf->nf_name); |
---|
668 | n/a | } |
---|
669 | n/a | d = adddfa(g, nf->nf_type, nf->nf_name); |
---|
670 | n/a | makedfa(gr, gr->gr_nfa[i], d); |
---|
671 | n/a | } |
---|
672 | n/a | |
---|
673 | n/a | return g; |
---|
674 | n/a | } |
---|
675 | n/a | |
---|
676 | n/a | grammar * |
---|
677 | n/a | pgen(node *n) |
---|
678 | n/a | { |
---|
679 | n/a | nfagrammar *gr; |
---|
680 | n/a | grammar *g; |
---|
681 | n/a | |
---|
682 | n/a | gr = metacompile(n); |
---|
683 | n/a | g = maketables(gr); |
---|
684 | n/a | translatelabels(g); |
---|
685 | n/a | addfirstsets(g); |
---|
686 | n/a | freenfagrammar(gr); |
---|
687 | n/a | return g; |
---|
688 | n/a | } |
---|
689 | n/a | |
---|
690 | n/a | grammar * |
---|
691 | n/a | Py_pgen(node *n) |
---|
692 | n/a | { |
---|
693 | n/a | return pgen(n); |
---|
694 | n/a | } |
---|
695 | n/a | |
---|
696 | n/a | /* |
---|
697 | n/a | |
---|
698 | n/a | Description |
---|
699 | n/a | ----------- |
---|
700 | n/a | |
---|
701 | n/a | Input is a grammar in extended BNF (using * for repetition, + for |
---|
702 | n/a | at-least-once repetition, [] for optional parts, | for alternatives and |
---|
703 | n/a | () for grouping). This has already been parsed and turned into a parse |
---|
704 | n/a | tree. |
---|
705 | n/a | |
---|
706 | n/a | Each rule is considered as a regular expression in its own right. |
---|
707 | n/a | It is turned into a Non-deterministic Finite Automaton (NFA), which |
---|
708 | n/a | is then turned into a Deterministic Finite Automaton (DFA), which is then |
---|
709 | n/a | optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3, |
---|
710 | n/a | or similar compiler books (this technique is more often used for lexical |
---|
711 | n/a | analyzers). |
---|
712 | n/a | |
---|
713 | n/a | The DFA's are used by the parser as parsing tables in a special way |
---|
714 | n/a | that's probably unique. Before they are usable, the FIRST sets of all |
---|
715 | n/a | non-terminals are computed. |
---|
716 | n/a | |
---|
717 | n/a | Reference |
---|
718 | n/a | --------- |
---|
719 | n/a | |
---|
720 | n/a | [Aho&Ullman 77] |
---|
721 | n/a | Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 |
---|
722 | n/a | (first edition) |
---|
723 | n/a | |
---|
724 | n/a | */ |
---|