Package pyraf :: Module generic
[hide private]
[frames] | no frames]

Source Code for Module pyraf.generic

  1  """generic.py: John Aycock's little languages (SPARK) framework 
  2   
  3  $Id: generic.py 1463 2011-06-24 22:58:30Z stsci_embray $ 
  4  """ 
  5   
  6  #  Copyright (c) 1998-2000 John Aycock 
  7  # 
  8  #  Permission is hereby granted, free of charge, to any person obtaining 
  9  #  a copy of this software and associated documentation files (the 
 10  #  "Software"), to deal in the Software without restriction, including 
 11  #  without limitation the rights to use, copy, modify, merge, publish, 
 12  #  distribute, sublicense, and/or sell copies of the Software, and to 
 13  #  permit persons to whom the Software is furnished to do so, subject to 
 14  #  the following conditions: 
 15  # 
 16  #  The above copyright notice and this permission notice shall be 
 17  #  included in all copies or substantial portions of the Software. 
 18  # 
 19  #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 20  #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 21  #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
 22  #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
 23  #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 24  #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 25  #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
 26   
 27  # Modifications by R. White: 
 28  # - Allow named groups in scanner patterns (other than the ones 
 29  #   inserted by the class constructor.) 
 30  # - Speed up GenericScanner.tokenize(). 
 31  #   + Keep a list (indexlist) of the group numbers to check. 
 32  #   + Break out of loop after finding a match, since more than 
 33  #     one of the primary patterns cannot match (by construction of 
 34  #     the pattern.) 
 35  # - Add optional value parameter to GenericParser.error. 
 36  # - Add check for assertion error in ambiguity resolution. 
 37   
 38  from __future__ import division # confidence high 
 39   
 40  __version__ = 'SPARK-0.6.1rlw' 
 41   
 42  import re 
 43  import sys 
 44  import string 
 45  import cltoken 
 46  token = cltoken 
 47   
48 -def _namelist(instance):
49 namelist, namedict, classlist = [], {}, [instance.__class__] 50 for c in classlist: 51 for b in c.__bases__: 52 classlist.append(b) 53 for name in c.__dict__.keys(): 54 if not namedict.has_key(name): 55 namelist.append(name) 56 namedict[name] = 1 57 return namelist
58
59 -class GenericScanner:
60 - def __init__(self):
61 pattern = self.reflect() 62 self.re = re.compile(pattern, re.VERBOSE) 63 64 self.index2func = {} 65 self.indexlist = [] 66 for name, number in self.re.groupindex.items(): 67 # allow other named groups 68 if hasattr(self, 't_' + name): 69 self.index2func[number-1] = getattr(self, 't_' + name) 70 self.indexlist.append(number-1)
71
72 - def makeRE(self, name):
73 doc = getattr(self, name).__doc__ 74 rv = '(?P<%s>%s)' % (name[2:], doc) 75 return rv
76
77 - def reflect(self):
78 rv = [] 79 for name in _namelist(self): 80 if name[:2] == 't_' and name != 't_default': 81 rv.append(self.makeRE(name)) 82 83 rv.append(self.makeRE('t_default')) 84 return '|'.join(rv)
85
86 - def error(self, s, pos):
87 print "Lexical error at position %s" % pos 88 raise SystemExit
89
90 - def tokenize(self, s):
91 pos = 0 92 n = len(s) 93 match = self.re.match 94 indexlist = self.indexlist 95 index2func = self.index2func 96 while pos < n: 97 m = match(s, pos) 98 if m is None: 99 self.error(s, pos) 100 101 groups = m.groups() 102 for i in indexlist: 103 if groups[i] is not None: 104 index2func[i](groups[i]) 105 # assume there is only a single match 106 break 107 pos = m.end()
108
109 - def t_default(self, s):
110 r'( . | \n )+' 111 pass
112
113 -class GenericParser:
114 - def __init__(self, start):
115 self.rules = {} 116 self.rule2func = {} 117 self.rule2name = {} 118 self.collectRules() 119 self.startRule = self.augment(start) 120 self.ruleschanged = 1
121 122 _START = 'START' 123 _EOF = 'EOF' 124 125 # 126 # A hook for GenericASTBuilder and GenericASTMatcher. 127 #
128 - def preprocess(self, rule, func): return rule, func
129
130 - def addRule(self, doc, func):
131 rules = doc.split() 132 133 index = [] 134 for i in range(len(rules)): 135 if rules[i] == '::=': 136 index.append(i-1) 137 index.append(len(rules)) 138 139 for i in range(len(index)-1): 140 lhs = rules[index[i]] 141 rhs = rules[index[i]+2:index[i+1]] 142 rule = (lhs, tuple(rhs)) 143 144 rule, fn = self.preprocess(rule, func) 145 146 if self.rules.has_key(lhs): 147 self.rules[lhs].append(rule) 148 else: 149 self.rules[lhs] = [ rule ] 150 self.rule2func[rule] = fn 151 self.rule2name[rule] = func.__name__[2:] 152 self.ruleschanged = 1
153
154 - def collectRules(self):
155 for name in _namelist(self): 156 if name[:2] == 'p_': 157 func = getattr(self, name) 158 doc = func.__doc__ 159 self.addRule(doc, func)
160
161 - def augment(self, start):
162 # 163 # Tempting though it is, this isn't made into a call 164 # to self.addRule() because the start rule shouldn't 165 # be subject to preprocessing. 166 # 167 startRule = (self._START, ( start, self._EOF )) 168 self.rule2func[startRule] = lambda args: args[0] 169 self.rules[self._START] = [ startRule ] 170 self.rule2name[startRule] = '' 171 return startRule
172
173 - def makeFIRST(self):
174 # make the FIRST sets 175 first = {} 176 for key in self.rules.keys(): 177 first[key] = {} 178 changed = 1 179 npass = 0 180 while (changed > 0): 181 npass = npass+1 182 changed = 0 183 for key, this in first.items(): 184 for lhs, rhs in self.rules[key]: 185 for token in rhs: 186 # add token or first[token] to this set 187 # also track whether token derives epsilon; if it 188 # does, need to add FIRST set for next token too 189 derivesEpsilon = 0 190 if not self.rules.has_key(token): 191 # this is a terminal 192 if not this.has_key(token): 193 this[token] = 1 194 changed = changed+1 195 else: 196 # this is a nonterminal -- add its FIRST set 197 for ntkey in first[token].keys(): 198 if ntkey == "": 199 derivesEpsilon = 1 200 elif ntkey != key and not this.has_key(ntkey): 201 this[ntkey] = 1 202 changed = changed+1 203 if not derivesEpsilon: break 204 else: 205 # if get all the way through, add epsilon too 206 if not this.has_key(""): 207 this[""] = 1 208 changed = changed+1 209 # make the rule/token lists 210 self.makeTokenRules(first)
211
212 - def makeTokenRules(self,first):
213 # make dictionary indexed by (nextSymbol, nextToken) with 214 # list of all rules for nextSymbol that could produce nextToken 215 tokenRules = {} 216 # make a list of all terminal tokens 217 allTokens = {} 218 for key, rule in self.rules.items(): 219 for lhs, rhs in rule: 220 for token in rhs: 221 if not self.rules.has_key(token): 222 # this is a terminal 223 allTokens[token] = 1 224 for nextSymbol, flist in first.items(): 225 for nextToken in flist.keys(): 226 tokenRules[(nextSymbol, nextToken)] = [] 227 if flist.has_key(""): 228 for nextToken in allTokens.keys(): 229 tokenRules[(nextSymbol, nextToken)] = [] 230 for prule in self.rules[nextSymbol]: 231 prhs = prule[1] 232 done = {} 233 for element in prhs: 234 pflist = first.get(element) 235 if pflist is not None: 236 if not done.has_key(element): 237 done[element] = 1 238 # non-terminal 239 for nextToken in pflist.keys(): 240 if nextToken and not done.has_key(nextToken): 241 done[nextToken] = 1 242 tokenRules[(nextSymbol, nextToken)].append(prule) 243 if not pflist.has_key(""): break 244 else: 245 # terminal token 246 if not done.has_key(element): 247 done[element] = 1 248 tokenRules[(nextSymbol, element)].append(prule) 249 break 250 else: 251 # this entire rule can produce null 252 # add it to all FIRST symbols and to null list 253 tokenRules[(nextSymbol, "")].append(prule) 254 for nextToken in allTokens.keys(): 255 if not done.has_key(nextToken): 256 done[nextToken] = 1 257 tokenRules[(nextSymbol, nextToken)].append(prule) 258 self.tokenRules = tokenRules
259 260 # 261 # An Earley parser, as per J. Earley, "An Efficient Context-Free 262 # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, 263 # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, 264 # Carnegie-Mellon University, August 1968, p. 27. 265 # 266
267 - def typestring(self, token):
268 return None
269
270 - def error(self, token, value=None):
271 print "Syntax error at or near `%s' token" % token 272 if value is not None: print str(value) 273 raise SystemExit
274
275 - def parse(self, tokens):
276 tree = {} 277 # add a Token instead of a string so references to 278 # token.type in buildState work for EOF symbol 279 tokens.append(token.Token(self._EOF)) 280 states = (len(tokens)+1)*[None] 281 states[0] = [ (self.startRule, 0, 0) ] 282 283 if self.ruleschanged: 284 self.makeFIRST() 285 self.ruleschanged = 0 286 287 for i in xrange(len(tokens)): 288 states[i+1] = [] 289 290 if states[i] == []: 291 break 292 self.buildState(tokens[i], states, i, tree) 293 294 if i < len(tokens)-1 or states[i+1] != [ (self.startRule, 2, 0) ]: 295 del tokens[-1] 296 self.error(tokens[i-1]) 297 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1)) 298 del tokens[-1] 299 return rv
300
301 - def buildState(self, token, states, i, tree):
302 state = states[i] 303 predicted = {} 304 completed = {} 305 306 # optimize inner loops 307 state_append = state.append 308 tokenRules_get = self.tokenRules.get 309 for item in state: 310 rule, pos, parent = item 311 lhs, rhs = rule 312 313 # 314 # A -> a . (completer) 315 # 316 if pos == len(rhs): 317 # track items completed within this rule 318 if parent == i: 319 if lhs in completed: 320 completed[lhs].append((item,i)) 321 else: 322 completed[lhs] = [(item,i)] 323 324 lhstuple = (lhs,) 325 for prule, ppos, pparent in states[parent]: 326 plhs, prhs = prule 327 if prhs[ppos:ppos+1] == lhstuple: 328 new = (prule, ppos+1, pparent) 329 key = (new, i) 330 if key in tree: 331 tree[key].append((item, i)) 332 else: 333 state_append(new) 334 tree[key] = [(item, i)] 335 continue 336 337 nextSym = rhs[pos] 338 339 # 340 # A -> a . B (predictor) 341 # 342 if nextSym in self.rules: 343 if nextSym in predicted: 344 # 345 # this was already predicted -- but if it was 346 # also completed entirely within this step, then 347 # need to add completed version here too 348 # 349 if nextSym in completed: 350 new = (rule, pos+1, parent) 351 key = (new, i) 352 if key in tree: 353 tree[key].extend(completed[nextSym]) 354 else: 355 state_append(new) 356 tree[key] = completed[nextSym] 357 else: 358 predicted[nextSym] = 1 359 # 360 # Predictor using FIRST sets 361 # Use cached list for this (nextSym, token) combo 362 # 363 for prule in tokenRules_get((nextSym, token.type),[]): 364 state_append((prule, 0, i)) 365 366 # 367 # A -> a . c (scanner) 368 # 369 elif token.type == nextSym: 370 #assert new not in states[i+1] 371 states[i+1].append((rule, pos+1, parent))
372
373 - def buildTree(self, tokens, tree, root):
374 stack = [] 375 self.buildTree_r(stack, tokens, -1, tree, root) 376 return stack[0]
377
378 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
379 (rule, pos, parent), state = root 380 381 while pos > 0: 382 want = ((rule, pos, parent), state) 383 if not tree.has_key(want): 384 # 385 # Since pos > 0, it didn't come from closure, 386 # and if it isn't in tree[], then there must 387 # be a terminal symbol to the left of the dot. 388 # (It must be from a "scanner" step.) 389 # 390 pos = pos - 1 391 state = state - 1 392 stack.insert(0, tokens[tokpos]) 393 tokpos = tokpos - 1 394 else: 395 # 396 # There's a NT to the left of the dot. 397 # Follow the tree pointer recursively (>1 398 # tree pointers from it indicates ambiguity). 399 # Since the item must have come about from a 400 # "completer" step, the state where the item 401 # came from must be the parent state of the 402 # item the tree pointer points to. 403 # 404 children = tree[want] 405 if len(children) > 1: 406 # RLW I'm leaving in this try block for the moment, 407 # RLW although the current ambiguity resolver never 408 # RLW raises and assertion error (which I think may 409 # RLW be a bug.) 410 try: 411 child = self.ambiguity(children) 412 except AssertionError: 413 del tokens[-1] 414 print stack[0] 415 # self.error(tokens[tokpos], 'Parsing ambiguity'+str(children[:])) 416 self.error(stack[0], 'Parsing ambiguity'+str(children[:])) 417 else: 418 child = children[0] 419 420 tokpos = self.buildTree_r(stack, 421 tokens, tokpos, 422 tree, child) 423 pos = pos - 1 424 (crule, cpos, cparent), cstate = child 425 state = cparent 426 427 lhs, rhs = rule 428 result = self.rule2func[rule](stack[:len(rhs)]) 429 stack[:len(rhs)] = [result] 430 return tokpos
431
432 - def ambiguity(self, children):
433 # 434 # XXX - problem here and in collectRules() if the same 435 # rule appears in >1 method. But in that case the 436 # user probably gets what they deserve :-) Also 437 # undefined results if rules causing the ambiguity 438 # appear in the same method. 439 # RLW Modified so it uses rule as the key 440 # RLW If we stick with this, can eliminate rule2name 441 # 442 sortlist = [] 443 name2index = {} 444 for i in range(len(children)): 445 ((rule, pos, parent), index) = children[i] 446 lhs, rhs = rule 447 # name = self.rule2name[rule] 448 sortlist.append((len(rhs), rule)) 449 name2index[rule] = i 450 sortlist.sort() 451 list = map(lambda (a,b): b, sortlist) 452 return children[name2index[self.resolve(list)]]
453
454 - def resolve(self, list):
455 # 456 # Resolve ambiguity in favor of the shortest RHS. 457 # Since we walk the tree from the top down, this 458 # should effectively resolve in favor of a "shift". 459 # 460 # RLW Question -- This used to raise an assertion error 461 # RLW here if there were two choices with the same RHS length. 462 # RLW Why doesn't that happen now? Looks like an error? 463 return list[0]
464 465 # 466 # GenericASTBuilder automagically constructs a concrete/abstract syntax tree 467 # for a given input. The extra argument is a class (not an instance!) 468 # which supports the "__setslice__" and "__len__" methods. 469 # 470 # XXX - silently overrides any user code in methods. 471 # 472
473 -class GenericASTBuilder(GenericParser):
474 - def __init__(self, AST, start):
475 GenericParser.__init__(self, start) 476 self.AST = AST
477
478 - def preprocess(self, rule, func):
479 rebind = lambda lhs, self=self: \ 480 lambda args, lhs=lhs, self=self: \ 481 self.buildASTNode(args, lhs) 482 lhs, rhs = rule 483 return rule, rebind(lhs)
484
485 - def buildASTNode(self, args, lhs):
486 children = [] 487 for arg in args: 488 if isinstance(arg, self.AST): 489 children.append(arg) 490 else: 491 children.append(self.terminal(arg)) 492 return self.nonterminal(lhs, children)
493
494 - def terminal(self, token): return token
495
496 - def nonterminal(self, type, args):
497 rv = self.AST(type) 498 rv[:len(args)] = args 499 return rv
500 501 # 502 # GenericASTTraversal is a Visitor pattern according to Design Patterns. For 503 # each node it attempts to invoke the method n_<node type>, falling 504 # back onto the default() method if the n_* can't be found. The preorder 505 # traversal also looks for an exit hook named n_<node type>_exit (no default 506 # routine is called if it's not found). To prematurely halt traversal 507 # of a subtree, call the prune() method -- this only makes sense for a 508 # preorder traversal. 509 # 510
511 -class GenericASTTraversalPruningException(Exception):
512 pass
513
514 -class GenericASTTraversal:
515 - def __init__(self, ast):
516 self.ast = ast 517 self.collectRules()
518
519 - def collectRules(self):
520 self.rules = {} 521 self.exitrules = {} 522 for name in _namelist(self): 523 if name[:2] == 'n_': 524 self.rules[name[2:]] = getattr(self, name) 525 if name[-5:] == '_exit': 526 self.exitrules[name[2:-5]] = getattr(self, name)
527
528 - def prune(self):
530
531 - def preorder(self, node=None):
532 if node is None: 533 node = self.ast 534 535 try: 536 name = node.type 537 func = self.rules.get(name) 538 if func is None: 539 # add rule to cache so next time it is faster 540 func = self.default 541 self.rules[name] = func 542 func(node) 543 except GenericASTTraversalPruningException: 544 return 545 546 for kid in node: 547 # if kid.type=='term' and len(kid._kids)==3 and kid._kids[1].type=='/': 548 # # Not the place to check for integer divsion - the type is 549 # # either INTEGER or IDENT (and we dont know yet what the 550 # # underlying type of the IDENT is...) 551 # print(kid._kids[0].type, kid._kids[1].type, kid._kids[2].type) 552 self.preorder(kid) 553 554 func = self.exitrules.get(name) 555 if func is not None: 556 func(node)
557
558 - def postorder(self, node=None):
559 if node is None: 560 node = self.ast 561 562 for kid in node: 563 self.postorder(kid) 564 565 name = node.type 566 func = self.rules.get(name) 567 if func is None: 568 # add rule to cache so next time it is faster 569 func = self.default 570 self.rules[name] = func 571 func(node)
572
573 - def default(self, node):
574 pass
575 576 # 577 # GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" 578 # implemented. 579 # 580 # XXX - makes assumptions about how GenericParser walks the parse tree. 581 # 582
583 -class GenericASTMatcher(GenericParser):
584 - def __init__(self, start, ast):
585 GenericParser.__init__(self, start) 586 self.ast = ast
587
588 - def preprocess(self, rule, func):
589 rebind = lambda func, self=self: \ 590 lambda args, func=func, self=self: \ 591 self.foundMatch(args, func) 592 lhs, rhs = rule 593 rhslist = list(rhs) 594 rhslist.reverse() 595 596 return (lhs, tuple(rhslist)), rebind(func)
597
598 - def foundMatch(self, args, func):
599 func(args[-1]) 600 return args[-1]
601
602 - def match_r(self, node):
603 self.input.insert(0, node) 604 children = 0 605 606 for child in node: 607 if children == 0: 608 self.input.insert(0, '(') 609 children = children + 1 610 self.match_r(child) 611 612 if children > 0: 613 self.input.insert(0, ')')
614
615 - def match(self, ast=None):
616 if ast is None: 617 ast = self.ast 618 self.input = [] 619 620 self.match_r(ast) 621 self.parse(self.input)
622
623 - def resolve(self, list):
624 # 625 # Resolve ambiguity in favor of the longest RHS. 626 # 627 return list[-1]
628