1 from __future__ import division
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 __version__ = 'SPARK-0.6.1'
24
25 import re
26 import sys
27 import string
28
30 namelist, namedict, classlist = [], {}, [instance.__class__]
31 for c in classlist:
32 for b in c.__bases__:
33 classlist.append(b)
34 for name in dir(c):
35 if not namedict.has_key(name):
36 namelist.append(name)
37 namedict[name] = 1
38 return namelist
39
42 pattern = self.reflect()
43 self.re = re.compile(pattern, re.VERBOSE)
44
45 self.index2func = {}
46 for name, number in self.re.groupindex.items():
47 self.index2func[number-1] = getattr(self, 't_' + name)
48
50 doc = getattr(self, name).__doc__
51 rv = '(?P<%s>%s)' % (name[2:], doc)
52 return rv
53
55 rv = []
56 for name in _namelist(self):
57 if name[:2] == 't_' and name != 't_default':
58 rv.append(self.makeRE(name))
59
60 rv.append(self.makeRE('t_default'))
61 return string.join(rv, '|')
62
64 print "Lexical error at position %s" % pos
65 raise SystemExit
66
68 pos = 0
69 n = len(s)
70 while pos < n:
71 m = self.re.match(s, pos)
72 if m is None:
73 self.error(s, pos)
74
75 groups = m.groups()
76 for i in range(len(groups)):
77 if groups[i] and self.index2func.has_key(i):
78 self.index2func[i](groups[i])
79 pos = m.end()
80
82 r'( . | \n )+'
83 pass
84
87 self.rules = {}
88 self.rule2func = {}
89 self.rule2name = {}
90 self.collectRules()
91 self.startRule = self.augment(start)
92 self.ruleschanged = 1
93
94 _START = 'START'
95 _EOF = 'EOF'
96
97
98
99
100 - def preprocess(self, rule, func): return rule, func
101
103 rules = string.split(doc)
104
105 index = []
106 for i in range(len(rules)):
107 if rules[i] == '::=':
108 index.append(i-1)
109 index.append(len(rules))
110
111 for i in range(len(index)-1):
112 lhs = rules[index[i]]
113 rhs = rules[index[i]+2:index[i+1]]
114 rule = (lhs, tuple(rhs))
115
116 rule, fn = self.preprocess(rule, func)
117
118 if self.rules.has_key(lhs):
119 self.rules[lhs].append(rule)
120 else:
121 self.rules[lhs] = [ rule ]
122 self.rule2func[rule] = fn
123 self.rule2name[rule] = func.__name__[2:]
124 self.ruleschanged = 1
125
132
134
135
136
137
138
139 startRule = (self._START, ( start, self._EOF ))
140 self.rule2func[startRule] = lambda args: args[0]
141 self.rules[self._START] = [ startRule ]
142 self.rule2name[startRule] = ''
143 return startRule
144
146 union = {}
147 self.first = {}
148
149 for rulelist in self.rules.values():
150 for lhs, rhs in rulelist:
151 if not self.first.has_key(lhs):
152 self.first[lhs] = {}
153
154 if len(rhs) == 0:
155 self.first[lhs][None] = 1
156 continue
157
158 sym = rhs[0]
159 if not self.rules.has_key(sym):
160 self.first[lhs][sym] = 1
161 else:
162 union[(sym, lhs)] = 1
163 changes = 1
164 while changes:
165 changes = 0
166 for src, dest in union.keys():
167 destlen = len(self.first[dest])
168 self.first[dest].update(self.first[src])
169 if len(self.first[dest]) != destlen:
170 changes = 1
171
172
173
174
175
176
177
178
181
183 print "Syntax error at or near `%s' token" % token
184 raise ValueError("problem in the parsing phase")
185
186 - def parse(self, tokens):
187 tree = {}
188 tokens.append(self._EOF)
189 states = { 0: [ (self.startRule, 0, 0) ] }
190
191 if self.ruleschanged:
192 self.makeFIRST()
193
194 for i in xrange(len(tokens)):
195 states[i+1] = []
196
197 if states[i] == []:
198 break
199 self.buildState(tokens[i], states, i, tree)
200
201
202
203 if i < len(tokens)-1 or states[i+1] != [(self.startRule, 2, 0)]:
204 del tokens[-1]
205 self.error(tokens[i-1])
206 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
207 del tokens[-1]
208 return rv
209
211 needsCompletion = {}
212 state = states[i]
213 predicted = {}
214
215 for item in state:
216 rule, pos, parent = item
217 lhs, rhs = rule
218
219
220
221
222 if pos == len(rhs):
223 if len(rhs) == 0:
224 needsCompletion[lhs] = (item, i)
225
226 for pitem in states[parent]:
227 if pitem is item:
228 break
229
230 prule, ppos, pparent = pitem
231 plhs, prhs = prule
232
233 if prhs[ppos:ppos+1] == (lhs,):
234 new = (prule,
235 ppos+1,
236 pparent)
237 if new not in state:
238 state.append(new)
239 tree[(new, i)] = [(item, i)]
240 else:
241 tree[(new, i)].append((item, i))
242 continue
243
244 nextSym = rhs[pos]
245
246
247
248
249 if self.rules.has_key(nextSym):
250
251
252
253
254
255
256
257 if needsCompletion.has_key(nextSym):
258 new = (rule, pos+1, parent)
259 olditem_i = needsCompletion[nextSym]
260 if new not in state:
261 state.append(new)
262 tree[(new, i)] = [olditem_i]
263 else:
264 tree[(new, i)].append(olditem_i)
265
266
267
268
269 if predicted.has_key(nextSym):
270 continue
271 predicted[nextSym] = 1
272
273 ttype = token is not self._EOF and \
274 self.typestring(token) or \
275 None
276 if ttype is not None:
277
278
279
280
281
282
283
284
285
286 for prule in self.rules[nextSym]:
287 new = (prule, 0, i)
288 prhs = prule[1]
289 if len(prhs) == 0:
290 state.append(new)
291 continue
292 prhs0 = prhs[0]
293 if not self.rules.has_key(prhs0):
294 if prhs0 != ttype:
295 continue
296 else:
297 state.append(new)
298 continue
299 first = self.first[prhs0]
300 if not first.has_key(None) and \
301 not first.has_key(ttype):
302 continue
303 state.append(new)
304 continue
305
306 for prule in self.rules[nextSym]:
307
308
309
310
311
312 prhs = prule[1]
313 if len(prhs) > 0 and \
314 not self.rules.has_key(prhs[0]) and \
315 token != prhs[0]:
316 continue
317 state.append((prule, 0, i))
318
319
320
321
322 elif token == nextSym:
323
324 states[i+1].append((rule, pos+1, parent))
325
327 stack = []
328 self.buildTree_r(stack, tokens, -1, tree, root)
329 return stack[0]
330
331 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
332 (rule, pos, parent), state = root
333
334 while pos > 0:
335 want = ((rule, pos, parent), state)
336 if not tree.has_key(want):
337
338
339
340
341
342
343 pos = pos - 1
344 state = state - 1
345 stack.insert(0, tokens[tokpos])
346 tokpos = tokpos - 1
347 else:
348
349
350
351
352
353
354
355
356
357 children = tree[want]
358 if len(children) > 1:
359 child = self.ambiguity(children)
360 else:
361 child = children[0]
362
363 tokpos = self.buildTree_r(stack,
364 tokens, tokpos,
365 tree, child)
366 pos = pos - 1
367 (crule, cpos, cparent), cstate = child
368 state = cparent
369
370 lhs, rhs = rule
371 result = self.rule2func[rule](stack[:len(rhs)])
372 stack[:len(rhs)] = [result]
373 return tokpos
374
376
377
378
379
380
381
382
383 sortlist = []
384 name2index = {}
385 for i in range(len(children)):
386 ((rule, pos, parent), index) = children[i]
387 lhs, rhs = rule
388 name = self.rule2name[rule]
389 sortlist.append((len(rhs), name))
390 name2index[name] = i
391 sortlist.sort()
392 list = map(lambda (a,b): b, sortlist)
393 return children[name2index[self.resolve(list)]]
394
396
397
398
399
400
401 return list[0]
402
403
404
405
406
407
408
409
410
415
417 rebind = lambda lhs, self=self: \
418 lambda args, lhs=lhs, self=self: \
419 self.buildASTNode(args, lhs)
420 lhs, rhs = rule
421 return rule, rebind(lhs)
422
424 children = []
425 for arg in args:
426 if isinstance(arg, self.AST):
427 children.append(arg)
428 else:
429 children.append(self.terminal(arg))
430 return self.nonterminal(lhs, children)
431
432 - def terminal(self, token): return token
433
435 rv = self.AST(type)
436 rv[:len(args)] = args
437 return rv
438
439
440
441
442
443
444
445
446
447
448
451
455
458
461
483
484 - def postorder(self, node=None):
485 if node is None:
486 node = self.ast
487
488 for kid in node:
489 self.postorder(kid)
490
491 name = 'n_' + self.typestring(node)
492 if hasattr(self, name):
493 func = getattr(self, name)
494 func(node)
495 else:
496 self.default(node)
497
498
501
502
503
504
505
506
507
508
513
515 rebind = lambda func, self=self: \
516 lambda args, func=func, self=self: \
517 self.foundMatch(args, func)
518 lhs, rhs = rule
519 rhslist = list(rhs)
520 rhslist.reverse()
521
522 return (lhs, tuple(rhslist)), rebind(func)
523
525 func(args[-1])
526 return args[-1]
527
529 self.input.insert(0, node)
530 children = 0
531
532 for child in node:
533 if children == 0:
534 self.input.insert(0, '(')
535 children = children + 1
536 self.match_r(child)
537
538 if children > 0:
539 self.input.insert(0, ')')
540
541 - def match(self, ast=None):
542 if ast is None:
543 ast = self.ast
544 self.input = []
545
546 self.match_r(ast)
547 self.parse(self.input)
548
550
551
552
553 return list[-1]
554
555 -def _dump(tokens, states):
556 for i in range(len(states)):
557 print 'state', i
558 for (lhs, rhs), pos, parent in states[i]:
559 print '\t', lhs, '::=',
560 print string.join(rhs[:pos]),
561 print '.',
562 print string.join(rhs[pos:]),
563 print ',', parent
564 if i < len(tokens):
565 print
566 print 'token', str(tokens[i])
567 print
568