1 """generic.py: John Aycock's little languages (SPARK) framework
2
3 $Id: generic.py 1463 2011-06-24 22:58:30Z stsci_embray $
4 """
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 from __future__ import division
39
40 __version__ = 'SPARK-0.6.1rlw'
41
42 import re
43 import sys
44 import string
45 import cltoken
46 token = cltoken
47
49 namelist, namedict, classlist = [], {}, [instance.__class__]
50 for c in classlist:
51 for b in c.__bases__:
52 classlist.append(b)
53 for name in c.__dict__.keys():
54 if not namedict.has_key(name):
55 namelist.append(name)
56 namedict[name] = 1
57 return namelist
58
61 pattern = self.reflect()
62 self.re = re.compile(pattern, re.VERBOSE)
63
64 self.index2func = {}
65 self.indexlist = []
66 for name, number in self.re.groupindex.items():
67
68 if hasattr(self, 't_' + name):
69 self.index2func[number-1] = getattr(self, 't_' + name)
70 self.indexlist.append(number-1)
71
73 doc = getattr(self, name).__doc__
74 rv = '(?P<%s>%s)' % (name[2:], doc)
75 return rv
76
85
87 print "Lexical error at position %s" % pos
88 raise SystemExit
89
91 pos = 0
92 n = len(s)
93 match = self.re.match
94 indexlist = self.indexlist
95 index2func = self.index2func
96 while pos < n:
97 m = match(s, pos)
98 if m is None:
99 self.error(s, pos)
100
101 groups = m.groups()
102 for i in indexlist:
103 if groups[i] is not None:
104 index2func[i](groups[i])
105
106 break
107 pos = m.end()
108
110 r'( . | \n )+'
111 pass
112
115 self.rules = {}
116 self.rule2func = {}
117 self.rule2name = {}
118 self.collectRules()
119 self.startRule = self.augment(start)
120 self.ruleschanged = 1
121
122 _START = 'START'
123 _EOF = 'EOF'
124
125
126
127
128 - def preprocess(self, rule, func): return rule, func
129
131 rules = doc.split()
132
133 index = []
134 for i in range(len(rules)):
135 if rules[i] == '::=':
136 index.append(i-1)
137 index.append(len(rules))
138
139 for i in range(len(index)-1):
140 lhs = rules[index[i]]
141 rhs = rules[index[i]+2:index[i+1]]
142 rule = (lhs, tuple(rhs))
143
144 rule, fn = self.preprocess(rule, func)
145
146 if self.rules.has_key(lhs):
147 self.rules[lhs].append(rule)
148 else:
149 self.rules[lhs] = [ rule ]
150 self.rule2func[rule] = fn
151 self.rule2name[rule] = func.__name__[2:]
152 self.ruleschanged = 1
153
155 for name in _namelist(self):
156 if name[:2] == 'p_':
157 func = getattr(self, name)
158 doc = func.__doc__
159 self.addRule(doc, func)
160
162
163
164
165
166
167 startRule = (self._START, ( start, self._EOF ))
168 self.rule2func[startRule] = lambda args: args[0]
169 self.rules[self._START] = [ startRule ]
170 self.rule2name[startRule] = ''
171 return startRule
172
174
175 first = {}
176 for key in self.rules.keys():
177 first[key] = {}
178 changed = 1
179 npass = 0
180 while (changed > 0):
181 npass = npass+1
182 changed = 0
183 for key, this in first.items():
184 for lhs, rhs in self.rules[key]:
185 for token in rhs:
186
187
188
189 derivesEpsilon = 0
190 if not self.rules.has_key(token):
191
192 if not this.has_key(token):
193 this[token] = 1
194 changed = changed+1
195 else:
196
197 for ntkey in first[token].keys():
198 if ntkey == "":
199 derivesEpsilon = 1
200 elif ntkey != key and not this.has_key(ntkey):
201 this[ntkey] = 1
202 changed = changed+1
203 if not derivesEpsilon: break
204 else:
205
206 if not this.has_key(""):
207 this[""] = 1
208 changed = changed+1
209
210 self.makeTokenRules(first)
211
213
214
215 tokenRules = {}
216
217 allTokens = {}
218 for key, rule in self.rules.items():
219 for lhs, rhs in rule:
220 for token in rhs:
221 if not self.rules.has_key(token):
222
223 allTokens[token] = 1
224 for nextSymbol, flist in first.items():
225 for nextToken in flist.keys():
226 tokenRules[(nextSymbol, nextToken)] = []
227 if flist.has_key(""):
228 for nextToken in allTokens.keys():
229 tokenRules[(nextSymbol, nextToken)] = []
230 for prule in self.rules[nextSymbol]:
231 prhs = prule[1]
232 done = {}
233 for element in prhs:
234 pflist = first.get(element)
235 if pflist is not None:
236 if not done.has_key(element):
237 done[element] = 1
238
239 for nextToken in pflist.keys():
240 if nextToken and not done.has_key(nextToken):
241 done[nextToken] = 1
242 tokenRules[(nextSymbol, nextToken)].append(prule)
243 if not pflist.has_key(""): break
244 else:
245
246 if not done.has_key(element):
247 done[element] = 1
248 tokenRules[(nextSymbol, element)].append(prule)
249 break
250 else:
251
252
253 tokenRules[(nextSymbol, "")].append(prule)
254 for nextToken in allTokens.keys():
255 if not done.has_key(nextToken):
256 done[nextToken] = 1
257 tokenRules[(nextSymbol, nextToken)].append(prule)
258 self.tokenRules = tokenRules
259
260
261
262
263
264
265
266
269
270 - def error(self, token, value=None):
271 print "Syntax error at or near `%s' token" % token
272 if value is not None: print str(value)
273 raise SystemExit
274
275 - def parse(self, tokens):
276 tree = {}
277
278
279 tokens.append(token.Token(self._EOF))
280 states = (len(tokens)+1)*[None]
281 states[0] = [ (self.startRule, 0, 0) ]
282
283 if self.ruleschanged:
284 self.makeFIRST()
285 self.ruleschanged = 0
286
287 for i in xrange(len(tokens)):
288 states[i+1] = []
289
290 if states[i] == []:
291 break
292 self.buildState(tokens[i], states, i, tree)
293
294 if i < len(tokens)-1 or states[i+1] != [ (self.startRule, 2, 0) ]:
295 del tokens[-1]
296 self.error(tokens[i-1])
297 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
298 del tokens[-1]
299 return rv
300
302 state = states[i]
303 predicted = {}
304 completed = {}
305
306
307 state_append = state.append
308 tokenRules_get = self.tokenRules.get
309 for item in state:
310 rule, pos, parent = item
311 lhs, rhs = rule
312
313
314
315
316 if pos == len(rhs):
317
318 if parent == i:
319 if lhs in completed:
320 completed[lhs].append((item,i))
321 else:
322 completed[lhs] = [(item,i)]
323
324 lhstuple = (lhs,)
325 for prule, ppos, pparent in states[parent]:
326 plhs, prhs = prule
327 if prhs[ppos:ppos+1] == lhstuple:
328 new = (prule, ppos+1, pparent)
329 key = (new, i)
330 if key in tree:
331 tree[key].append((item, i))
332 else:
333 state_append(new)
334 tree[key] = [(item, i)]
335 continue
336
337 nextSym = rhs[pos]
338
339
340
341
342 if nextSym in self.rules:
343 if nextSym in predicted:
344
345
346
347
348
349 if nextSym in completed:
350 new = (rule, pos+1, parent)
351 key = (new, i)
352 if key in tree:
353 tree[key].extend(completed[nextSym])
354 else:
355 state_append(new)
356 tree[key] = completed[nextSym]
357 else:
358 predicted[nextSym] = 1
359
360
361
362
363 for prule in tokenRules_get((nextSym, token.type),[]):
364 state_append((prule, 0, i))
365
366
367
368
369 elif token.type == nextSym:
370
371 states[i+1].append((rule, pos+1, parent))
372
374 stack = []
375 self.buildTree_r(stack, tokens, -1, tree, root)
376 return stack[0]
377
378 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
379 (rule, pos, parent), state = root
380
381 while pos > 0:
382 want = ((rule, pos, parent), state)
383 if not tree.has_key(want):
384
385
386
387
388
389
390 pos = pos - 1
391 state = state - 1
392 stack.insert(0, tokens[tokpos])
393 tokpos = tokpos - 1
394 else:
395
396
397
398
399
400
401
402
403
404 children = tree[want]
405 if len(children) > 1:
406
407
408
409
410 try:
411 child = self.ambiguity(children)
412 except AssertionError:
413 del tokens[-1]
414 print stack[0]
415
416 self.error(stack[0], 'Parsing ambiguity'+str(children[:]))
417 else:
418 child = children[0]
419
420 tokpos = self.buildTree_r(stack,
421 tokens, tokpos,
422 tree, child)
423 pos = pos - 1
424 (crule, cpos, cparent), cstate = child
425 state = cparent
426
427 lhs, rhs = rule
428 result = self.rule2func[rule](stack[:len(rhs)])
429 stack[:len(rhs)] = [result]
430 return tokpos
431
433
434
435
436
437
438
439
440
441
442 sortlist = []
443 name2index = {}
444 for i in range(len(children)):
445 ((rule, pos, parent), index) = children[i]
446 lhs, rhs = rule
447
448 sortlist.append((len(rhs), rule))
449 name2index[rule] = i
450 sortlist.sort()
451 list = map(lambda (a,b): b, sortlist)
452 return children[name2index[self.resolve(list)]]
453
455
456
457
458
459
460
461
462
463 return list[0]
464
465
466
467
468
469
470
471
472
477
479 rebind = lambda lhs, self=self: \
480 lambda args, lhs=lhs, self=self: \
481 self.buildASTNode(args, lhs)
482 lhs, rhs = rule
483 return rule, rebind(lhs)
484
486 children = []
487 for arg in args:
488 if isinstance(arg, self.AST):
489 children.append(arg)
490 else:
491 children.append(self.terminal(arg))
492 return self.nonterminal(lhs, children)
493
494 - def terminal(self, token): return token
495
500
501
502
503
504
505
506
507
508
509
510
513
518
520 self.rules = {}
521 self.exitrules = {}
522 for name in _namelist(self):
523 if name[:2] == 'n_':
524 self.rules[name[2:]] = getattr(self, name)
525 if name[-5:] == '_exit':
526 self.exitrules[name[2:-5]] = getattr(self, name)
527
530
532 if node is None:
533 node = self.ast
534
535 try:
536 name = node.type
537 func = self.rules.get(name)
538 if func is None:
539
540 func = self.default
541 self.rules[name] = func
542 func(node)
543 except GenericASTTraversalPruningException:
544 return
545
546 for kid in node:
547
548
549
550
551
552 self.preorder(kid)
553
554 func = self.exitrules.get(name)
555 if func is not None:
556 func(node)
557
558 - def postorder(self, node=None):
559 if node is None:
560 node = self.ast
561
562 for kid in node:
563 self.postorder(kid)
564
565 name = node.type
566 func = self.rules.get(name)
567 if func is None:
568
569 func = self.default
570 self.rules[name] = func
571 func(node)
572
575
576
577
578
579
580
581
582
587
589 rebind = lambda func, self=self: \
590 lambda args, func=func, self=self: \
591 self.foundMatch(args, func)
592 lhs, rhs = rule
593 rhslist = list(rhs)
594 rhslist.reverse()
595
596 return (lhs, tuple(rhslist)), rebind(func)
597
599 func(args[-1])
600 return args[-1]
601
603 self.input.insert(0, node)
604 children = 0
605
606 for child in node:
607 if children == 0:
608 self.input.insert(0, '(')
609 children = children + 1
610 self.match_r(child)
611
612 if children > 0:
613 self.input.insert(0, ')')
614
615 - def match(self, ast=None):
616 if ast is None:
617 ast = self.ast
618 self.input = []
619
620 self.match_r(ast)
621 self.parse(self.input)
622
624
625
626
627 return list[-1]
628