| Home | Trees | Indices | Help |
|
|---|
|
|
1 """cl tokenizer/scanner using John Aycock's little languages (SPARK) framework 2 3 This version uses a context-sensitive pattern stack 4 5 $Id: clscan.py 1463 2011-06-24 22:58:30Z stsci_embray $ 6 7 R. White, 1999 September 10 8 """ 9 from __future__ import division # confidence high 10 11 from cgeneric import ContextSensitiveScanner 12 from generic import GenericScanner 13 from cltoken import Token 14 import string, re 15 from stsci.tools import irafutils 16 import pyrafglobals 17 18 # contexts for scanner 19 20 _START_LINE_MODE = 0 # beginning of line 21 _COMMAND_MODE = 1 # simple command mode 22 _COMPUTE_START_MODE = 2 # initial compute mode (similar to command mode) 23 _COMPUTE_EQN_MODE = 3 # compute mode in task arg when equation-mode 24 # change flag has been seen. Reverts to 25 # _COMPUTE_START_MODE on comma, redirection, etc. 26 _COMPUTE_MODE = 4 # compute (script, equation) mode 27 _SWALLOW_NEWLINE_MODE = 5 # mode at points where embedded newlines allowed 28 _ACCEPT_REDIR_MODE = 6 # mode at points where redirection allowed 29 30 #--------------------------------------------------------------------- 31 # Regular Expressions for additional string replacement 32 #--------------------------------------------------------------------- 33 # 34 # Match embedded comments in a multi-line string 35 # Matches escaped newline followed by line with free-standing comment, 36 # which we ignore to match unusual (ahem) IRAF behavior. 37 38 comment_pat = re.compile(r'\\\s*\n\s*#.*\n\s*') 39 40 # needed to prevent certain escapes to be protected to match IRAF 41 # string behavior (only \\, \b, \n, \r, \t, \digits are converted into 42 # special characters, all other's are left as is) 43 44 special_escapes = re.compile(r'[\\\\]*(\\[^fnrt\\\'"\d])') 4547 """Turn all backslashes that aren't special character for IRAF into 48 double backslashes""" 49 50 return special_escapes.sub(r'\\\1', instr)51 52 53 #--------------------------------------------------------------------- 54 # Scanners for various contexts 55 #--------------------------------------------------------------------- 56 57 #--------------------------------------------------------------------- 58 # BasicScanner: tokens recognized in all modes 59 #--------------------------------------------------------------------- 6062 63 """Scanner class for tokens that can be recognized late""" 64 68104 10570 r'\n' 71 parent.addToken(type='NEWLINE') 72 parent.lineno = parent.lineno + 1 73 # reset mode at start of each line (unless newline was matched 74 # as part of another pattern) 75 parent.startLine()7678 r'\)' 79 parent.addToken(type=')') 80 del parent.current[-1] 81 parent.parencount = parent.parencount - 1 82 # add , as argument separator after this 83 if parent.current and parent.current[-1] == _COMMAND_MODE: 84 parent.argsep = ','8587 r'\|&?' 88 # pipe is always recognized (it turns out) 89 # this must be after the '||' pattern 90 parent.addToken(type='PIPE', attr=s) 91 # Pipe symbol puts us in start-line mode, but leaves 92 # paren count (because pipes can occur inside task parentheses) 93 parent.startLine(parencount=parent.parencount) 94 parent.current.append(_SWALLOW_NEWLINE_MODE)95 100107 108 """Scanner class for tokens that must be recognized before those defined 109 in the _BasicScanner_1 class. 110 """ 111126 127113 r'\\[ \t]*\n' 114 # trailing '\' completely absorbed 115 # This allows spaces after \ and before newline -- I do not 116 # allow that inside quotes. 117 parent.lineno = parent.lineno + 1118129 130 """Scanner class for Tokens that must be recognized before those defined 131 in the _BasicScanner_2 or _BasicScanner_1 classes. 132 """ 133209 210 211 # addition for sloppy scanner 212 # ignores binary data embedded in CL files 213 221 222 223 #--------------------------------------------------------------------- 224 # StartScanner: Tokens recognized in start-line mode 225 #--------------------------------------------------------------------- 226135 r'> (>? ( [GIP]+ | & ) | >)' 136 # matches >> >& >>& >G >I >P >>G >>GI etc. 137 parent.addToken(type='REDIR', attr=s) 138 #XXX may not need following -- I think redirection in 139 #XXX compute-eqn mode should always be trapped by 140 #XXX accept-REDIR mode, and exitComputeEqnMode does 141 #XXX not do anything in other modes 142 parent.exitComputeEqnMode() 143 parent.current.append(_SWALLOW_NEWLINE_MODE)144146 r'\#(?P<Comment>.*)' 147 # skip comment, leaving newline in string 148 # look for special mode-shifting commands 149 comment = m.group('Comment') 150 if comment[:1] == '{': 151 parent.default_mode = _COMPUTE_START_MODE 152 elif comment[:1] == '}': 153 parent.default_mode = _COMMAND_MODE154156 r'(^|\n)[ \t]*!.*' 157 # Host OS command escape. Strip off everything 158 # up through the '!'. 159 if s[0] == '\n': 160 parent.addToken(type='NEWLINE') 161 parent.lineno = parent.lineno + 1 162 cmd = s.strip()[1:] 163 parent.addToken(type='OSESCAPE', attr=cmd.strip())164166 r"' [^'\\\n]* ( ( ((\\(.|\n)|\n)[\s?]*) | '' ) [^'\\\n]* )*'" 167 # this pattern allows both escaped embedded quotes and 168 # embedded double quotes ('embedded''quotes') 169 # it also allows escaped newlines 170 if parent.current[-1] == _COMMAND_MODE: 171 parent.addToken(type=parent.argsep) 172 parent.argsep = ',' 173 174 nline = _countNewlines(s) 175 # Recognize and remove any embedded comments 176 s = comment_pat.sub('',s) 177 178 s = filterEscapes(irafutils.removeEscapes( 179 irafutils.stripQuotes(s),quoted=1)) 180 # We use a different type for quoted strings to protect them 181 # against conversion to other token types by enterComputeEqnMode 182 parent.addToken(type='QSTRING', attr=s) 183 parent.lineno = parent.lineno + nline184186 r'" [^"\\\n]* ( ( ((\\(.|\n)|\n)[\s?]*) | "" ) [^"\\\n]* )* "' 187 if parent.current[-1] == _COMMAND_MODE: 188 parent.addToken(type=parent.argsep) 189 parent.argsep = ',' 190 191 nline = _countNewlines(s) 192 193 # Recognize and remove any embedded comments 194 s = comment_pat.sub('',s) 195 196 s = filterEscapes(irafutils.removeEscapes( 197 irafutils.stripQuotes(s),quoted=1)) 198 parent.addToken(type='QSTRING', attr=s) 199 parent.lineno = parent.lineno + nline200228253 254 258 262 263 264 #--------------------------------------------------------------------- 265 # CommandScanner: Tokens recognized in command mode 266 #--------------------------------------------------------------------- 267230 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*' 231 # Go to command mode 232 parent.addIdent(s, mode=parent.default_mode)233235 r'\(' 236 parent.addToken(type='(') 237 parent.current.append(_COMPUTE_MODE) 238 parent.parencount = parent.parencount + 1 239 # redirection can follow open parens 240 parent.current.append(_ACCEPT_REDIR_MODE)241 246269302 303271 r'[^ \t\n()\\;{}&]+(\\(.|\n)[^ \t\n()\\;{}&]*)*' 272 # What other characters are forbidden in unquoted strings? 273 # Allowing escaped newlines, blanks, quotes, etc. 274 # Increment line count for embedded newlines (after adding token) 275 parent.addToken(type=parent.argsep) 276 parent.argsep = ',' 277 nline = _countNewlines(s) 278 # Handle special escapes then, escape all remaining backslashes 279 # since IRAF doesn't deal with special characters in this mode. 280 # Thus PyRAF should leave them as literal backslashes within its 281 # strings. Why IRAF does this I have no idea. 282 s = irafutils.removeEscapes(s).replace('\\','\\\\') 283 parent.addToken(type='STRING', attr=s) 284 parent.lineno = parent.lineno + nline285287 r'\[' 288 parent.addToken(type=s) 289 # push to compute mode 290 parent.current.append(_COMPUTE_MODE)291293 r'\(' 294 parent.addToken(type=parent.argsep) 295 parent.argsep = ',' 296 parent.addToken(type='(') 297 # push to compute mode 298 parent.current.append(_COMPUTE_MODE) 299 parent.parencount = parent.parencount + 1 300 # redirection can follow open parens 301 parent.current.append(_ACCEPT_REDIR_MODE)305343 344307 r'(?P<KeyName>[a-zA-Z\$_\d][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 308 # note that keywords can start with a number (!) in command mode 309 parent.addToken(type=parent.argsep) 310 parent.argsep = None 311 parent.addIdent(m.group('KeyName'), usekey=0) 312 parent.addToken(type='=')313315 r'[a-zA-Z\$_\d][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t\n<>\|]))' 316 # note that keywords can start with a number (!) in command mode 317 parent.addToken(type=parent.argsep) 318 parent.argsep = ',' 319 parent.addIdent(s[:-1], usekey=0) 320 parent.addToken(type=s[-1])321323 r'[a-zA-Z\$_\d][a-zA-Z\$_\d.]*\(' 324 # matches identifier follow by open parenthesis (no whitespace) 325 # note that keywords can start with a number (!) in command mode 326 parent.addToken(type=parent.argsep) 327 parent.argsep = ',' 328 parent.addIdent(s[:-1], usekey=0) 329 parent.addToken(type='(') 330 # push to compute mode 331 parent.current.append(_COMPUTE_MODE) 332 parent.parencount = parent.parencount + 1 333 # redirection can follow open parens 334 parent.current.append(_ACCEPT_REDIR_MODE)335346 347 """Strict scanner class for tokens recognized in command mode""" 348360 361 365 366 367 #--------------------------------------------------------------------- 368 # ComputeStartScanner: Tokens recognized in initial compute mode 369 # (similar to command mode) 370 #--------------------------------------------------------------------- 371350 r' < | >>? ([GIP]+|&?) | \|&? ' 351 # Redirection is accepted anywhere in command mode 352 if s[0] == '|': 353 parent.addToken(type='PIPE', attr=s) 354 parent.startLine(parencount=parent.parencount) 355 else: 356 parent.addToken(type=parent.argsep) 357 parent.argsep = None 358 parent.addToken(type='REDIR', attr=s) 359 parent.current.append(_SWALLOW_NEWLINE_MODE)373416 417375 r'[a-zA-Z_$][a-zA-Z_$.0-9]*' 376 # This is a quoteless string with some strict syntax limits. 377 # Most special characters are excluded. Escapes are not allowed 378 # either. 379 parent.addToken(type='STRING', attr=s)380 384386 r',' 387 # commas are parameter separators in this mode 388 # newlines, redirection allowed after comma 389 parent.addToken(type=s) 390 parent.current.append(_ACCEPT_REDIR_MODE) 391 parent.current.append(_SWALLOW_NEWLINE_MODE)392394 r'\[' 395 parent.addToken(type=s) 396 # push to compute mode 397 parent.current.append(_COMPUTE_MODE)398400 r'\(' 401 parent.enterComputeEqnMode() 402 parent.addToken(type='(') 403 # push to compute mode 404 parent.current.append(_COMPUTE_MODE) 405 parent.parencount = parent.parencount + 1 406 # redirection can follow open parens 407 parent.current.append(_ACCEPT_REDIR_MODE)408410 r'\*\*|//|\*|\+|-|/|%' 411 #XXX Could make this type OP if we don't need to distinguish them 412 parent.enterComputeEqnMode() 413 parent.addToken(type=s) 414 # line breaks are allowed after operators 415 parent.current.append(_SWALLOW_NEWLINE_MODE)419461421 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 422 parent.addIdent(m.group('KeyName'), usekey=0) 423 parent.addToken(type='=')424426 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 427 # Difference from command mode t_keybool is that comma/paren can 428 # terminate argument 429 # This pattern requires a following comma, newline, or 430 # redirection so that expressions can be distinguished from 431 # boolean args in this mode 432 parent.addIdent(s[:-1], usekey=0) 433 parent.addToken(type=s[-1]) 434 parent.current.append(_ACCEPT_REDIR_MODE)435437 r'( [+\-*/] | // )? =' 438 if s == '=': 439 parent.addToken(type=s) 440 else: 441 parent.addToken(type='ASSIGNOP',attr=s) 442 parent.current.append(_COMPUTE_MODE)443445 r' < | >>? ([GIP]+|&?) | \|&? ' 446 # Redirection is accepted in command mode 447 if s[0] == '|': 448 parent.addToken(type='PIPE', attr=s) 449 parent.startLine(parencount=parent.parencount) 450 else: 451 parent.addToken(type='REDIR', attr=s) 452 parent.current.append(_SWALLOW_NEWLINE_MODE)453 457463 """Strict scanner class for tokens recognized in initial compute mode 464 (similar to command mode) 465 """ 466 pass467469 """Scanner class for tokens recognized in initial compute mode 470 (similar to command mode) 471 """ 472 pass473 474 #--------------------------------------------------------------------- 475 # ComputeEqnScanner: Tokens recognized in compute equation mode 476 # Mostly like standard Compute mode, but reverts to ComputeStart 477 # mode on comma 478 #--------------------------------------------------------------------- 479481524 525483 r'\(' 484 parent.addToken(type='(') 485 parent.current.append(_COMPUTE_MODE) 486 parent.parencount = parent.parencount + 1 487 # redirection can follow open parens 488 #XXX get rid of this? 489 parent.current.append(_ACCEPT_REDIR_MODE)490492 r'\*\*|//|\*|\+|-|/|%' 493 #XXX Could make this type OP if we don't need to distinguish them 494 parent.addToken(type=s) 495 # line breaks are allowed after operators 496 parent.current.append(_SWALLOW_NEWLINE_MODE)497499 r'\|\||&&|!' 500 # split '!' off separately 501 if len(s) > 1: 502 parent.addToken(type='LOGOP',attr=s) 503 else: 504 parent.addToken(type=s) 505 parent.current.append(_SWALLOW_NEWLINE_MODE)506 510 514516 r',' 517 # commas are parameter separators in this mode 518 # commas also terminate this mode 519 parent.exitComputeEqnMode() 520 parent.addToken(type=s) 521 # newlines, redirection allowed after comma 522 parent.current.append(_ACCEPT_REDIR_MODE) 523 parent.current.append(_SWALLOW_NEWLINE_MODE)527557 558529 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 530 parent.addIdent(m.group('KeyName'), usekey=0) 531 parent.addToken(type='=')532534 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 535 # Difference from command mode t_keybool is that comma/paren can 536 # terminate argument 537 # This pattern requires a following comma, newline, or 538 # redirection so that expressions can be distinguished from 539 # boolean args in this mode 540 parent.addIdent(s[:-1], usekey=0) 541 parent.addToken(type=s[-1]) 542 parent.current.append(_ACCEPT_REDIR_MODE)543 547549 r'( [+\-*/] | // ) =' 550 parent.addToken(type='ASSIGNOP',attr=s) 551 # switch to compute mode 552 parent.current[-1] = _COMPUTE_MODE553560 561 """Strict scanner class for tokens recognized in compute equation mode""" 562567 571 572 #--------------------------------------------------------------------- 573 # ComputeScanner: Tokens recognized in compute mode 574 #--------------------------------------------------------------------- 575564 r'[<>!=]=|<|>' 565 parent.addToken(type='COMPOP',attr=s) 566 parent.current.append(_SWALLOW_NEWLINE_MODE)577619 620579 r'\(' 580 parent.addToken(type='(') 581 # push to compute mode 582 parent.current.append(_COMPUTE_MODE) 583 parent.parencount = parent.parencount + 1 584 # redirection can follow open parens 585 # XXX get rid of this? 586 parent.current.append(_ACCEPT_REDIR_MODE)587589 r'\*\*|//|\*|\+|-|/|%' 590 #XXX Could make this type OP if we don't need to distinguish them 591 parent.addToken(type=s) 592 # line breaks are allowed after operators 593 parent.current.append(_SWALLOW_NEWLINE_MODE)594596 r'\|\||&&|!' 597 # split '!' off separately 598 if len(s) > 1: 599 parent.addToken(type='LOGOP',attr=s) 600 else: 601 parent.addToken(type=s) 602 parent.current.append(_SWALLOW_NEWLINE_MODE)603 607 611613 r',' 614 # commas are parameter separators in this mode 615 parent.addToken(type=s) 616 # newlines, redirection allowed after comma 617 parent.current.append(_ACCEPT_REDIR_MODE) 618 parent.current.append(_SWALLOW_NEWLINE_MODE)622650 651624 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 625 parent.addIdent(m.group('KeyName'), usekey=0) 626 parent.addToken(type='=')627629 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 630 # Difference from command mode t_keybool is that comma/paren can 631 # terminate argument 632 # This pattern requires a following comma, newline, or 633 # redirection so that expressions can be distinguished from 634 # boolean args in this mode 635 parent.addIdent(s[:-1], usekey=0) 636 parent.addToken(type=s[-1]) 637 parent.current.append(_ACCEPT_REDIR_MODE)638 642 646653 654 """Strict scanner class for tokens recognized in compute mode""" 655660 664 665 #--------------------------------------------------------------------- 666 # SwallowNewlineScanner: Tokens recognized at points where 667 # embedded newlines are allowed 668 #--------------------------------------------------------------------- 669657 r'[<>!=]=|<|>' 658 parent.addToken(type='COMPOP',attr=s) 659 parent.current.append(_SWALLOW_NEWLINE_MODE)671 672 """Strict scanner class where embedded newlines allowed""" 673683 684 685 _SwallowNewlineScanner = _StrictSwallowNewlineScanner 686 687 #--------------------------------------------------------------------- 688 # AcceptRedirScanner: Tokens that are recognized at points where 689 # redirection is allowed 690 #--------------------------------------------------------------------- 691675 r'[ \t\n]* ( ( \\ | (\#.*) ) [ \t\n]+ )*' 676 # Just grab all the following newlines 677 # Also consumes backslash continuations and comments 678 # Note that this always matches, so we always leave this 679 # mode after one match 680 parent.lineno = parent.lineno + _countNewlines(s) 681 # pop to previous mode 682 del parent.current[-1]694 695 """Strict scanner class where redirection is allowed""" 696718 719 723 724 725 #--------------------------------------------------------------------- 726 # Main context-sensitive scanner 727 #--------------------------------------------------------------------- 728 729 # dictionary of reserved keywords 730 731 732 # SEE ALSO ClScanner.__init__ for more ECL keywords. 733 _keywordDict = { 734 'begin': 1, 735 'break': 1, 736 'case': 1, 737 'default': 1, 738 'else': 1, 739 'end': 1, 740 'for': 1, 741 'goto': 1, 742 'if': 1, 743 'next': 1, 744 'procedure': 1, 745 'return': 1, 746 'switch': 1, 747 'while': 1, 748 } 749 750 _typeDict = { 'bool': 1, 751 'char': 1, 752 'file': 1, 753 'gcur': 1, 754 'imcur': 1, 755 'int': 1, 756 'pset': 1, 757 'real': 1, 758 'string': 1, 759 'struct': 1, 760 'ukey': 1, 761 } 762 763 _boolDict = { 'yes': 1, 'no': 1, } 764 765 # list of scanners for each state 766 # only need to create these once, since they are designed to 767 # contain no state information 768 769 _scannerDict = None 770 _strictScannerDict = None 771698 r' < | >>? ([GIP]+|&?) | \|&? ' 699 if s[0] == '|': 700 parent.addToken(type='PIPE', attr=s) 701 parent.startLine(parencount=parent.parencount) 702 else: 703 parent.addToken(type='REDIR', attr=s) 704 # pop this state 705 del parent.current[-1] 706 # allow following newlines 707 parent.current.append(_SWALLOW_NEWLINE_MODE)708 713773 global _scannerDict 774 if _scannerDict is None: 775 _scannerDict = { 776 _START_LINE_MODE: _StartScanner(), 777 _COMMAND_MODE: _CommandScanner(), 778 _COMPUTE_START_MODE: _ComputeStartScanner(), 779 _COMPUTE_EQN_MODE: _ComputeEqnScanner(), 780 _COMPUTE_MODE: _ComputeScanner(), 781 _SWALLOW_NEWLINE_MODE: _SwallowNewlineScanner(), 782 _ACCEPT_REDIR_MODE: _AcceptRedirScanner(), 783 } 784 return _scannerDict785787 global _strictScannerDict 788 # create strict scanners 789 if _strictScannerDict is None: 790 _strictScannerDict = { 791 _START_LINE_MODE: _StrictStartScanner(), 792 _COMMAND_MODE: _StrictCommandScanner(), 793 _COMPUTE_START_MODE: _StrictComputeStartScanner(), 794 _COMPUTE_EQN_MODE: _StrictComputeEqnScanner(), 795 _COMPUTE_MODE: _StrictComputeScanner(), 796 _SWALLOW_NEWLINE_MODE: _StrictSwallowNewlineScanner(), 797 _ACCEPT_REDIR_MODE: _StrictAcceptRedirScanner(), 798 } 799 return _strictScannerDict800 801803 804 """CL scanner class""" 805954 955807 808 if pyrafglobals._use_ecl: 809 _keywordDict["iferr"] = 1 810 _keywordDict["ifnoerr"] = 1 811 _keywordDict["then"] = 1 812 813 self.strict = strict 814 if strict: 815 sdict = _getStrictScannerDict() 816 else: 817 sdict = _getScannerDict() 818 ContextSensitiveScanner.__init__(self, sdict)819821 # go to _START_LINE_MODE 822 self.parencount = parencount 823 self.argsep = argsep 824 self.current = [ _START_LINE_MODE ]825827 self.rv = [] 828 self.lineno = 1 829 # default mode when leaving _START_LINE_MODE 830 self.default_mode = default_mode 831 # argsep is used to insert commas as argument separators 832 # in command mode 833 self.argsep = None 834 self.parencount = 0 835 ContextSensitiveScanner.tokenize(self, input) 836 self.addToken(type='NEWLINE') 837 return self.rv838840 # add a token to the list (with some twists to simplify parsing) 841 842 if type is None: return 843 844 # insert NEWLINE before '}' 845 846 if type == '}' and self.rv and self.rv[-1].type != 'NEWLINE': 847 self.rv.append(Token(type='NEWLINE', attr=None, lineno=self.lineno)) 848 849 ## suppress newline after '{' or ';' 850 #if type != 'NEWLINE' or (self.rv and self.rv[-1].type != 'NEWLINE' and 851 # self.rv[-1].type != '{' and 852 # self.rv[-1].type != ';'): 853 854 # compress out multiple/leading newlines 855 # suppress newline after '{' 856 857 if type != 'NEWLINE' or (self.rv and self.rv[-1].type != 'NEWLINE' and 858 self.rv[-1].type != '{'): 859 860 # Another ugly hack -- the syntax 861 # 862 # taskname(arg, arg, | taskname2 arg, arg) 863 # 864 # causes parsing problems. To help solve them, delete any 865 # comma that just precedes a PIPE 866 867 if type=='PIPE' and self.rv and self.rv[-1].type == ',': 868 del self.rv[-1] 869 870 self.rv.append(Token(type=type, attr=attr, lineno=self.lineno)) 871 872 # insert NEWLINE after '}' too 873 # go to start-line mode 874 if type == '}' and self.rv and self.rv[-1].type != 'NEWLINE': 875 self.rv.append(Token(type='NEWLINE', attr=None, lineno=self.lineno)) 876 self.startLine()877879 # Add identifier token, recognizing keywords if usekey parameter is set 880 # Note keywords may be in any case 881 # For normal (non-keyword) identifiers, goes to mode 882 883 keyword = name.lower() 884 if usekey and _keywordDict.has_key(keyword): 885 886 self.addToken(type=keyword.upper(), attr=keyword) 887 if keyword == "procedure": 888 # Procedure scripts are always in compute mode 889 self.default_mode = _COMPUTE_START_MODE 890 if keyword == "if" or keyword == "else": 891 # For `if', `else' go into _START_LINE_MODE 892 self.startLine() 893 elif self.current[-1] != _COMPUTE_MODE: 894 # Other keywords put us into _COMPUTE_MODE 895 self.current.append(_COMPUTE_MODE) 896 897 elif usekey and _typeDict.has_key(keyword) and \ 898 self.current[-1] == _START_LINE_MODE: 899 900 # types are treated as keywords only if first token on line 901 self.addToken(type='TYPE', attr=keyword) 902 self.current.append(_COMPUTE_MODE) 903 904 elif keyword == "indef" or keyword == "eof": 905 906 # INDEF, EOF always get recognized 907 self.addToken(type=keyword.upper()) 908 909 elif keyword == "epsilon": 910 911 # epsilon always gets recognized 912 self.addToken(type="FLOAT", attr=keyword) 913 # xxx self.addToken(type="FLOAT") 914 # AttributeError: 'NoneType' object has no attribute 'find' 915 # xxx self.addToken(type=keyword.upper()) 916 # epsilon was quoted 917 918 elif _boolDict.has_key(keyword): 919 920 # boolean yes, no always gets recognized 921 self.addToken(type='BOOL', attr=keyword) 922 923 else: 924 925 self.addToken(type='IDENT',attr=name) 926 if mode is not None: self.current.append(mode)927929 # Nasty hack to work around weird CL syntax 930 # In compute-start mode, tokens are strings or identifiers 931 # or numbers depending on what follows them, and the mode 932 # once switched to compute-mode stays there until a 933 # terminating comma. Ugly stuff. 934 # 935 # This is called when a token is received that triggers the 936 # transition to the compute-eqn mode from compute-start mode. 937 # It may be necessary to change tokens already on the 938 # list when this is called... 939 940 self.current.append(_COMPUTE_EQN_MODE) 941 if self.rv and self.rv[-1].type == "STRING": 942 # if last token was a string, we must remove it and 943 # rescan it using the compute-mode scanner 944 # Hope this works! 945 last = self.rv[-1].attr 946 del self.rv[-1] 947 ContextSensitiveScanner.tokenize(self, last)948950 # Companion to enterComputeEqnMode -- called when we encounter 951 # a token that may cause us to exit the mode 952 if self.current[-1] == _COMPUTE_EQN_MODE: 953 del self.current[-1]957 """Return number of newlines in string""" 958 n = 0 959 i = s.find('\n') 960 while (i>=0): 961 n = n+1 962 i = s.find('\n', i+1) 963 return n964 965 970 971973 # list tokens 974 import cltoken 975 if filename: 976 import sys 977 sys.stdout = open(filename,'w') 978 for tok in tlist: 979 if tok.type == 'NEWLINE': 980 if cltoken.verbose: 981 print 'NEWLINE' 982 else: 983 print 984 else: 985 print `tok`, 986 if filename: 987 sys.stdout.close() 988 sys.stdout = sys.__stdout__989 990 if __name__ == '__main__': 991 s = CLScanner() 992 993 # scan file 'simple.cl' 994 995 lines = open('simple.cl').read() 996 tokens = s.tokenize(lines) 997 998 toklist(tokens[:30]) 999
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Aug 22 14:37:30 2011 | http://epydoc.sourceforge.net |