Package pyraf :: Module clscan
[hide private]
[frames] | no frames]

Source Code for Module pyraf.clscan

   1  """cl tokenizer/scanner using John Aycock's little languages (SPARK) framework 
   2   
   3  This version uses a context-sensitive pattern stack 
   4   
   5  $Id: clscan.py 1463 2011-06-24 22:58:30Z stsci_embray $ 
   6   
   7  R. White, 1999 September 10 
   8  """ 
   9  from __future__ import division # confidence high 
  10   
  11  from cgeneric import ContextSensitiveScanner 
  12  from generic import GenericScanner 
  13  from cltoken import Token 
  14  import string, re 
  15  from stsci.tools import irafutils 
  16  import pyrafglobals 
  17   
  18  # contexts for scanner 
  19   
  20  _START_LINE_MODE = 0            # beginning of line 
  21  _COMMAND_MODE = 1               # simple command mode 
  22  _COMPUTE_START_MODE = 2         # initial compute mode (similar to command mode) 
  23  _COMPUTE_EQN_MODE = 3           # compute mode in task arg when equation-mode 
  24                                  # change flag has been seen.  Reverts to 
  25                                  # _COMPUTE_START_MODE on comma, redirection, etc. 
  26  _COMPUTE_MODE = 4               # compute (script, equation) mode 
  27  _SWALLOW_NEWLINE_MODE = 5       # mode at points where embedded newlines allowed 
  28  _ACCEPT_REDIR_MODE = 6          # mode at points where redirection allowed 
  29   
  30  #--------------------------------------------------------------------- 
  31  # Regular Expressions for additional string replacement 
  32  #--------------------------------------------------------------------- 
  33  # 
  34  # Match embedded comments in a multi-line string 
  35  # Matches escaped newline followed by line with free-standing comment, 
  36  # which we ignore to match unusual (ahem) IRAF behavior. 
  37   
  38  comment_pat = re.compile(r'\\\s*\n\s*#.*\n\s*') 
  39   
  40  # needed to prevent certain escapes to be protected to match IRAF  
  41  # string behavior (only \\, \b, \n, \r, \t, \digits are converted into 
  42  # special characters, all other's are left as is) 
  43   
  44  special_escapes = re.compile(r'[\\\\]*(\\[^fnrt\\\'"\d])') 
  45   
46 -def filterEscapes(instr):
47 """Turn all backslashes that aren't special character for IRAF into 48 double backslashes""" 49 50 return special_escapes.sub(r'\\\1', instr)
51 52 53 #--------------------------------------------------------------------- 54 # Scanners for various contexts 55 #--------------------------------------------------------------------- 56 57 #--------------------------------------------------------------------- 58 # BasicScanner: tokens recognized in all modes 59 #--------------------------------------------------------------------- 60
61 -class _BasicScanner_1(GenericScanner):
62 63 """Scanner class for tokens that can be recognized late""" 64
65 - def t_whitespace(self, s, m, parent):
66 r'[ \t]+' 67 pass
68
69 - def t_newline(self, s, m, parent):
70 r'\n' 71 parent.addToken(type='NEWLINE') 72 parent.lineno = parent.lineno + 1 73 # reset mode at start of each line (unless newline was matched 74 # as part of another pattern) 75 parent.startLine()
76
77 - def t_rparen(self, s, m, parent):
78 r'\)' 79 parent.addToken(type=')') 80 del parent.current[-1] 81 parent.parencount = parent.parencount - 1 82 # add , as argument separator after this 83 if parent.current and parent.current[-1] == _COMMAND_MODE: 84 parent.argsep = ','
85
86 - def t_pipe(self, s, m, parent):
87 r'\|&?' 88 # pipe is always recognized (it turns out) 89 # this must be after the '||' pattern 90 parent.addToken(type='PIPE', attr=s) 91 # Pipe symbol puts us in start-line mode, but leaves 92 # paren count (because pipes can occur inside task parentheses) 93 parent.startLine(parencount=parent.parencount) 94 parent.current.append(_SWALLOW_NEWLINE_MODE)
95
96 - def t_bkgd(self, s, m, parent):
97 r'&' 98 # background execution 99 parent.addToken(type='BKGD', attr=s)
100
101 - def t_default(self, s, m, parent):
102 r'.' 103 parent.addToken(type=s)
104 105
106 -class _BasicScanner_2:
107 108 """Scanner class for tokens that must be recognized before those defined 109 in the _BasicScanner_1 class. 110 """ 111
112 - def t_backslash(self, s, m, parent):
113 r'\\[ \t]*\n' 114 # trailing '\' completely absorbed 115 # This allows spaces after \ and before newline -- I do not 116 # allow that inside quotes. 117 parent.lineno = parent.lineno + 1
118
119 - def t_colon(self, s, m, parent):
120 r':' 121 parent.addToken(type=s) 122 # add a newline after colon (which may appear in 123 # label or case stmt) and go to start-line mode 124 parent.addToken(type='NEWLINE') 125 parent.startLine()
126 127
128 -class _BasicScanner_3:
129 130 """Scanner class for Tokens that must be recognized before those defined 131 in the _BasicScanner_2 or _BasicScanner_1 classes. 132 """ 133
134 - def t_complex_redir(self, s, m, parent):
135 r'> (>? ( [GIP]+ | & ) | >)' 136 # matches >> >& >>& >G >I >P >>G >>GI etc. 137 parent.addToken(type='REDIR', attr=s) 138 #XXX may not need following -- I think redirection in 139 #XXX compute-eqn mode should always be trapped by 140 #XXX accept-REDIR mode, and exitComputeEqnMode does 141 #XXX not do anything in other modes 142 parent.exitComputeEqnMode() 143 parent.current.append(_SWALLOW_NEWLINE_MODE)
144
145 - def t_comment(self, s, m, parent):
146 r'\#(?P<Comment>.*)' 147 # skip comment, leaving newline in string 148 # look for special mode-shifting commands 149 comment = m.group('Comment') 150 if comment[:1] == '{': 151 parent.default_mode = _COMPUTE_START_MODE 152 elif comment[:1] == '}': 153 parent.default_mode = _COMMAND_MODE
154
155 - def t_osescape(self, s, m, parent):
156 r'(^|\n)[ \t]*!.*' 157 # Host OS command escape. Strip off everything 158 # up through the '!'. 159 if s[0] == '\n': 160 parent.addToken(type='NEWLINE') 161 parent.lineno = parent.lineno + 1 162 cmd = s.strip()[1:] 163 parent.addToken(type='OSESCAPE', attr=cmd.strip())
164
165 - def t_singlequote(self, s, m, parent):
166 r"' [^'\\\n]* ( ( ((\\(.|\n)|\n)[\s?]*) | '' ) [^'\\\n]* )*'" 167 # this pattern allows both escaped embedded quotes and 168 # embedded double quotes ('embedded''quotes') 169 # it also allows escaped newlines 170 if parent.current[-1] == _COMMAND_MODE: 171 parent.addToken(type=parent.argsep) 172 parent.argsep = ',' 173 174 nline = _countNewlines(s) 175 # Recognize and remove any embedded comments 176 s = comment_pat.sub('',s) 177 178 s = filterEscapes(irafutils.removeEscapes( 179 irafutils.stripQuotes(s),quoted=1)) 180 # We use a different type for quoted strings to protect them 181 # against conversion to other token types by enterComputeEqnMode 182 parent.addToken(type='QSTRING', attr=s) 183 parent.lineno = parent.lineno + nline
184
185 - def t_doublequote(self, s, m, parent):
186 r'" [^"\\\n]* ( ( ((\\(.|\n)|\n)[\s?]*) | "" ) [^"\\\n]* )* "' 187 if parent.current[-1] == _COMMAND_MODE: 188 parent.addToken(type=parent.argsep) 189 parent.argsep = ',' 190 191 nline = _countNewlines(s) 192 193 # Recognize and remove any embedded comments 194 s = comment_pat.sub('',s) 195 196 s = filterEscapes(irafutils.removeEscapes( 197 irafutils.stripQuotes(s),quoted=1)) 198 parent.addToken(type='QSTRING', attr=s) 199 parent.lineno = parent.lineno + nline
200
201 - def t_semicolon(self, s, m, parent):
202 r';' 203 parent.addToken(type=';') 204 # usually we reset mode just like on newline 205 # if semicolon inside parentheses, just stay in compute mode 206 # this occurs legally only in the (e1;e2;e3) clause of a `for' stmt 207 if parent.parencount <= 0: 208 parent.startLine()
209 210 211 # addition for sloppy scanner 212 # ignores binary data embedded in CL files 213
214 -class _LaxScanner:
215
216 - def t_default(self, s, m, parent):
217 r'.' 218 # skip binary data 219 if '\x1a' < s < '\x7f': 220 parent.addToken(type=s)
221 222 223 #--------------------------------------------------------------------- 224 # StartScanner: Tokens recognized in start-line mode 225 #--------------------------------------------------------------------- 226
227 -class _StartScanner_1(_BasicScanner_1):
228
229 - def t_ident(self, s, m, parent):
230 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*' 231 # Go to command mode 232 parent.addIdent(s, mode=parent.default_mode)
233
234 - def t_lparen(self, s, m, parent):
235 r'\(' 236 parent.addToken(type='(') 237 parent.current.append(_COMPUTE_MODE) 238 parent.parencount = parent.parencount + 1 239 # redirection can follow open parens 240 parent.current.append(_ACCEPT_REDIR_MODE)
241
242 - def t_equals(self, s, m, parent):
243 r'=' 244 parent.addToken(type=s) 245 parent.current.append(_COMPUTE_MODE)
246
247 - def t_help(self, s, m, parent):
248 r'\?\??' 249 if len(s) == 2: 250 parent.addIdent('allPkgHelp',mode=parent.default_mode) 251 else: 252 parent.addIdent('pkgHelp',mode=parent.default_mode)
253 254
255 -class _StrictStartScanner(_BasicScanner_3,_BasicScanner_2,_StartScanner_1):
256 """Strict scanner class for tokens recognized in start-line mode""" 257 pass
258
259 -class _StartScanner(_LaxScanner,_StrictStartScanner):
260 """Scanner class for tokens recognized in start-line mode""" 261 pass
262 263 264 #--------------------------------------------------------------------- 265 # CommandScanner: Tokens recognized in command mode 266 #--------------------------------------------------------------------- 267
268 -class _CommandScanner_1(_BasicScanner_1):
269
270 - def t_string(self, s, m, parent):
271 r'[^ \t\n()\\;{}&]+(\\(.|\n)[^ \t\n()\\;{}&]*)*' 272 # What other characters are forbidden in unquoted strings? 273 # Allowing escaped newlines, blanks, quotes, etc. 274 # Increment line count for embedded newlines (after adding token) 275 parent.addToken(type=parent.argsep) 276 parent.argsep = ',' 277 nline = _countNewlines(s) 278 # Handle special escapes then, escape all remaining backslashes 279 # since IRAF doesn't deal with special characters in this mode. 280 # Thus PyRAF should leave them as literal backslashes within its 281 # strings. Why IRAF does this I have no idea. 282 s = irafutils.removeEscapes(s).replace('\\','\\\\') 283 parent.addToken(type='STRING', attr=s) 284 parent.lineno = parent.lineno + nline
285
286 - def t_lbracket(self, s, m, parent):
287 r'\[' 288 parent.addToken(type=s) 289 # push to compute mode 290 parent.current.append(_COMPUTE_MODE)
291
292 - def t_lparen(self, s, m, parent):
293 r'\(' 294 parent.addToken(type=parent.argsep) 295 parent.argsep = ',' 296 parent.addToken(type='(') 297 # push to compute mode 298 parent.current.append(_COMPUTE_MODE) 299 parent.parencount = parent.parencount + 1 300 # redirection can follow open parens 301 parent.current.append(_ACCEPT_REDIR_MODE)
302 303
304 -class _CommandScanner_2(_BasicScanner_2,_CommandScanner_1):
305
306 - def t_keyval(self, s, m, parent):
307 r'(?P<KeyName>[a-zA-Z\$_\d][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 308 # note that keywords can start with a number (!) in command mode 309 parent.addToken(type=parent.argsep) 310 parent.argsep = None 311 parent.addIdent(m.group('KeyName'), usekey=0) 312 parent.addToken(type='=')
313
314 - def t_keybool(self, s, m, parent):
315 r'[a-zA-Z\$_\d][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t\n<>\|]))' 316 # note that keywords can start with a number (!) in command mode 317 parent.addToken(type=parent.argsep) 318 parent.argsep = ',' 319 parent.addIdent(s[:-1], usekey=0) 320 parent.addToken(type=s[-1])
321
322 - def t_functioncall(self, s, m, parent):
323 r'[a-zA-Z\$_\d][a-zA-Z\$_\d.]*\(' 324 # matches identifier follow by open parenthesis (no whitespace) 325 # note that keywords can start with a number (!) in command mode 326 parent.addToken(type=parent.argsep) 327 parent.argsep = ',' 328 parent.addIdent(s[:-1], usekey=0) 329 parent.addToken(type='(') 330 # push to compute mode 331 parent.current.append(_COMPUTE_MODE) 332 parent.parencount = parent.parencount + 1 333 # redirection can follow open parens 334 parent.current.append(_ACCEPT_REDIR_MODE)
335
336 - def t_assignop(self, s, m, parent):
337 r'( [+\-*/] | // )? =' 338 if s == '=': 339 parent.addToken(type=s) 340 else: 341 parent.addToken(type='ASSIGNOP',attr=s) 342 parent.current.append(_COMPUTE_MODE)
343 344
345 -class _StrictCommandScanner(_BasicScanner_3,_CommandScanner_2):
346 347 """Strict scanner class for tokens recognized in command mode""" 348
349 - def t_redir(self, s, m, parent):
350 r' < | >>? ([GIP]+|&?) | \|&? ' 351 # Redirection is accepted anywhere in command mode 352 if s[0] == '|': 353 parent.addToken(type='PIPE', attr=s) 354 parent.startLine(parencount=parent.parencount) 355 else: 356 parent.addToken(type=parent.argsep) 357 parent.argsep = None 358 parent.addToken(type='REDIR', attr=s) 359 parent.current.append(_SWALLOW_NEWLINE_MODE)
360 361
362 -class _CommandScanner(_LaxScanner,_StrictCommandScanner):
363 """Scanner class for tokens recognized in command mode""" 364 pass
365 366 367 #--------------------------------------------------------------------- 368 # ComputeStartScanner: Tokens recognized in initial compute mode 369 # (similar to command mode) 370 #--------------------------------------------------------------------- 371
372 -class _ComputeStartScanner_1(_BasicScanner_1):
373
374 - def t_string(self, s, m, parent):
375 r'[a-zA-Z_$][a-zA-Z_$.0-9]*' 376 # This is a quoteless string with some strict syntax limits. 377 # Most special characters are excluded. Escapes are not allowed 378 # either. 379 parent.addToken(type='STRING', attr=s)
380
381 - def t_integer(self, s, m, parent):
382 r' \d+([bB]|([\da-fA-F]*[xX]))? ' 383 parent.addToken(type='INTEGER', attr=s)
384
385 - def t_comma(self, s, m, parent):
386 r',' 387 # commas are parameter separators in this mode 388 # newlines, redirection allowed after comma 389 parent.addToken(type=s) 390 parent.current.append(_ACCEPT_REDIR_MODE) 391 parent.current.append(_SWALLOW_NEWLINE_MODE)
392
393 - def t_lbracket(self, s, m, parent):
394 r'\[' 395 parent.addToken(type=s) 396 # push to compute mode 397 parent.current.append(_COMPUTE_MODE)
398
399 - def t_lparen(self, s, m, parent):
400 r'\(' 401 parent.enterComputeEqnMode() 402 parent.addToken(type='(') 403 # push to compute mode 404 parent.current.append(_COMPUTE_MODE) 405 parent.parencount = parent.parencount + 1 406 # redirection can follow open parens 407 parent.current.append(_ACCEPT_REDIR_MODE)
408
409 - def t_op(self, s, m, parent):
410 r'\*\*|//|\*|\+|-|/|%' 411 #XXX Could make this type OP if we don't need to distinguish them 412 parent.enterComputeEqnMode() 413 parent.addToken(type=s) 414 # line breaks are allowed after operators 415 parent.current.append(_SWALLOW_NEWLINE_MODE)
416 417
418 -class _ComputeStartScanner_2(_BasicScanner_2,_ComputeStartScanner_1):
419
420 - def t_keyval(self, s, m, parent):
421 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 422 parent.addIdent(m.group('KeyName'), usekey=0) 423 parent.addToken(type='=')
424
425 - def t_keybool(self, s, m, parent):
426 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 427 # Difference from command mode t_keybool is that comma/paren can 428 # terminate argument 429 # This pattern requires a following comma, newline, or 430 # redirection so that expressions can be distinguished from 431 # boolean args in this mode 432 parent.addIdent(s[:-1], usekey=0) 433 parent.addToken(type=s[-1]) 434 parent.current.append(_ACCEPT_REDIR_MODE)
435
436 - def t_assignop(self, s, m, parent):
437 r'( [+\-*/] | // )? =' 438 if s == '=': 439 parent.addToken(type=s) 440 else: 441 parent.addToken(type='ASSIGNOP',attr=s) 442 parent.current.append(_COMPUTE_MODE)
443
444 - def t_redir(self, s, m, parent):
445 r' < | >>? ([GIP]+|&?) | \|&? ' 446 # Redirection is accepted in command mode 447 if s[0] == '|': 448 parent.addToken(type='PIPE', attr=s) 449 parent.startLine(parencount=parent.parencount) 450 else: 451 parent.addToken(type='REDIR', attr=s) 452 parent.current.append(_SWALLOW_NEWLINE_MODE)
453
454 - def t_sexagesimal(self, s, m, parent):
455 r'\d+:\d+(:\d+(\.\d*)?)?' 456 parent.addToken(type='SEXAGESIMAL', attr=s)
457
458 - def t_float(self, s, m, parent):
459 r'(\d+[eEdD][+\-]?\d+) | (((\d*\.\d+)|(\d+\.\d*))([eEdD][+\-]?\d+)?)' 460 parent.addToken(type='FLOAT', attr=s)
461
462 -class _StrictComputeStartScanner(_BasicScanner_3,_ComputeStartScanner_2):
463 """Strict scanner class for tokens recognized in initial compute mode 464 (similar to command mode) 465 """ 466 pass
467
468 -class _ComputeStartScanner(_LaxScanner,_StrictComputeStartScanner):
469 """Scanner class for tokens recognized in initial compute mode 470 (similar to command mode) 471 """ 472 pass
473 474 #--------------------------------------------------------------------- 475 # ComputeEqnScanner: Tokens recognized in compute equation mode 476 # Mostly like standard Compute mode, but reverts to ComputeStart 477 # mode on comma 478 #--------------------------------------------------------------------- 479
480 -class _ComputeEqnScanner_1(_BasicScanner_1):
481
482 - def t_lparen(self, s, m, parent):
483 r'\(' 484 parent.addToken(type='(') 485 parent.current.append(_COMPUTE_MODE) 486 parent.parencount = parent.parencount + 1 487 # redirection can follow open parens 488 #XXX get rid of this? 489 parent.current.append(_ACCEPT_REDIR_MODE)
490
491 - def t_op(self, s, m, parent):
492 r'\*\*|//|\*|\+|-|/|%' 493 #XXX Could make this type OP if we don't need to distinguish them 494 parent.addToken(type=s) 495 # line breaks are allowed after operators 496 parent.current.append(_SWALLOW_NEWLINE_MODE)
497
498 - def t_logop(self, s, m, parent):
499 r'\|\||&&|!' 500 # split '!' off separately 501 if len(s) > 1: 502 parent.addToken(type='LOGOP',attr=s) 503 else: 504 parent.addToken(type=s) 505 parent.current.append(_SWALLOW_NEWLINE_MODE)
506
507 - def t_integer(self, s, m, parent):
508 r' \d+([bB]|([\da-fA-F]*[xX]))? ' 509 parent.addToken(type='INTEGER', attr=s)
510
511 - def t_ident(self, s, m, parent):
512 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*' 513 parent.addIdent(s)
514
515 - def t_comma(self, s, m, parent):
516 r',' 517 # commas are parameter separators in this mode 518 # commas also terminate this mode 519 parent.exitComputeEqnMode() 520 parent.addToken(type=s) 521 # newlines, redirection allowed after comma 522 parent.current.append(_ACCEPT_REDIR_MODE) 523 parent.current.append(_SWALLOW_NEWLINE_MODE)
524 525
526 -class _ComputeEqnScanner_2(_BasicScanner_2,_ComputeEqnScanner_1):
527
528 - def t_keyval(self, s, m, parent):
529 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 530 parent.addIdent(m.group('KeyName'), usekey=0) 531 parent.addToken(type='=')
532
533 - def t_keybool(self, s, m, parent):
534 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 535 # Difference from command mode t_keybool is that comma/paren can 536 # terminate argument 537 # This pattern requires a following comma, newline, or 538 # redirection so that expressions can be distinguished from 539 # boolean args in this mode 540 parent.addIdent(s[:-1], usekey=0) 541 parent.addToken(type=s[-1]) 542 parent.current.append(_ACCEPT_REDIR_MODE)
543
544 - def t_sexagesimal(self, s, m, parent):
545 r'\d+:\d+(:\d+(\.\d*)?)?' 546 parent.addToken(type='SEXAGESIMAL', attr=s)
547
548 - def t_assignop(self, s, m, parent):
549 r'( [+\-*/] | // ) =' 550 parent.addToken(type='ASSIGNOP',attr=s) 551 # switch to compute mode 552 parent.current[-1] = _COMPUTE_MODE
553
554 - def t_float(self, s, m, parent):
555 r'(\d+[eEdD][+\-]?\d+) | (((\d*\.\d+)|(\d+\.\d*))([eEdD][+\-]?\d+)?)' 556 parent.addToken(type='FLOAT', attr=s)
557 558
559 -class _StrictComputeEqnScanner(_BasicScanner_3,_ComputeEqnScanner_2):
560 561 """Strict scanner class for tokens recognized in compute equation mode""" 562
563 - def t_compop(self, s, m, parent):
564 r'[<>!=]=|<|>' 565 parent.addToken(type='COMPOP',attr=s) 566 parent.current.append(_SWALLOW_NEWLINE_MODE)
567
568 -class _ComputeEqnScanner(_LaxScanner,_StrictComputeEqnScanner):
569 """Scanner class for tokens recognized in compute mode""" 570 pass
571 572 #--------------------------------------------------------------------- 573 # ComputeScanner: Tokens recognized in compute mode 574 #--------------------------------------------------------------------- 575
576 -class _ComputeScanner_1(_BasicScanner_1):
577
578 - def t_lparen(self, s, m, parent):
579 r'\(' 580 parent.addToken(type='(') 581 # push to compute mode 582 parent.current.append(_COMPUTE_MODE) 583 parent.parencount = parent.parencount + 1 584 # redirection can follow open parens 585 # XXX get rid of this? 586 parent.current.append(_ACCEPT_REDIR_MODE)
587
588 - def t_op(self, s, m, parent):
589 r'\*\*|//|\*|\+|-|/|%' 590 #XXX Could make this type OP if we don't need to distinguish them 591 parent.addToken(type=s) 592 # line breaks are allowed after operators 593 parent.current.append(_SWALLOW_NEWLINE_MODE)
594
595 - def t_logop(self, s, m, parent):
596 r'\|\||&&|!' 597 # split '!' off separately 598 if len(s) > 1: 599 parent.addToken(type='LOGOP',attr=s) 600 else: 601 parent.addToken(type=s) 602 parent.current.append(_SWALLOW_NEWLINE_MODE)
603
604 - def t_integer(self, s, m, parent):
605 r' \d+([bB]|([\da-fA-F]*[xX]))? ' 606 parent.addToken(type='INTEGER', attr=s)
607
608 - def t_ident(self, s, m, parent):
609 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*' 610 parent.addIdent(s)
611
612 - def t_comma(self, s, m, parent):
613 r',' 614 # commas are parameter separators in this mode 615 parent.addToken(type=s) 616 # newlines, redirection allowed after comma 617 parent.current.append(_ACCEPT_REDIR_MODE) 618 parent.current.append(_SWALLOW_NEWLINE_MODE)
619 620
621 -class _ComputeScanner_2(_BasicScanner_2,_ComputeScanner_1):
622
623 - def t_keyval(self, s, m, parent):
624 r'(?P<KeyName>[a-zA-Z\$_][a-zA-Z\$_\d.]*) [ \t]* =(?!=)' 625 parent.addIdent(m.group('KeyName'), usekey=0) 626 parent.addToken(type='=')
627
628 - def t_keybool(self, s, m, parent):
629 r'[a-zA-Z\$_][a-zA-Z\$_\d.]*[+\-]($|(?=[ \t]*[\n<>\|,)]))' 630 # Difference from command mode t_keybool is that comma/paren can 631 # terminate argument 632 # This pattern requires a following comma, newline, or 633 # redirection so that expressions can be distinguished from 634 # boolean args in this mode 635 parent.addIdent(s[:-1], usekey=0) 636 parent.addToken(type=s[-1]) 637 parent.current.append(_ACCEPT_REDIR_MODE)
638
639 - def t_sexagesimal(self, s, m, parent):
640 r'\d+:\d+(:\d+(\.\d*)?)?' 641 parent.addToken(type='SEXAGESIMAL', attr=s)
642
643 - def t_assignop(self, s, m, parent):
644 r'( [+\-*/] | // ) =' 645 parent.addToken(type='ASSIGNOP',attr=s)
646
647 - def t_float(self, s, m, parent):
648 r'(\d+[eEdD][+\-]?\d+) | (((\d*\.\d+)|(\d+\.\d*))([eEdD][+\-]?\d+)?)' 649 parent.addToken(type='FLOAT', attr=s)
650 651
652 -class _StrictComputeScanner(_BasicScanner_3,_ComputeScanner_2):
653 654 """Strict scanner class for tokens recognized in compute mode""" 655
656 - def t_compop(self, s, m, parent):
657 r'[<>!=]=|<|>' 658 parent.addToken(type='COMPOP',attr=s) 659 parent.current.append(_SWALLOW_NEWLINE_MODE)
660
661 -class _ComputeScanner(_LaxScanner,_StrictComputeScanner):
662 """Scanner class for tokens recognized in compute mode""" 663 pass
664 665 #--------------------------------------------------------------------- 666 # SwallowNewlineScanner: Tokens recognized at points where 667 # embedded newlines are allowed 668 #--------------------------------------------------------------------- 669
670 -class _StrictSwallowNewlineScanner(GenericScanner):
671 672 """Strict scanner class where embedded newlines allowed""" 673
674 - def t_swallow_newlines(self, s, m, parent):
675 r'[ \t\n]* ( ( \\ | (\#.*) ) [ \t\n]+ )*' 676 # Just grab all the following newlines 677 # Also consumes backslash continuations and comments 678 # Note that this always matches, so we always leave this 679 # mode after one match 680 parent.lineno = parent.lineno + _countNewlines(s) 681 # pop to previous mode 682 del parent.current[-1]
683 684 685 _SwallowNewlineScanner = _StrictSwallowNewlineScanner 686 687 #--------------------------------------------------------------------- 688 # AcceptRedirScanner: Tokens that are recognized at points where 689 # redirection is allowed 690 #--------------------------------------------------------------------- 691
692 -class _StrictAcceptRedirScanner(_BasicScanner_3,_BasicScanner_2, 693 _BasicScanner_1):
694 695 """Strict scanner class where redirection is allowed""" 696
697 - def t_accept_redir(self, s, m, parent):
698 r' < | >>? ([GIP]+|&?) | \|&? ' 699 if s[0] == '|': 700 parent.addToken(type='PIPE', attr=s) 701 parent.startLine(parencount=parent.parencount) 702 else: 703 parent.addToken(type='REDIR', attr=s) 704 # pop this state 705 del parent.current[-1] 706 # allow following newlines 707 parent.current.append(_SWALLOW_NEWLINE_MODE)
708
709 - def t_ignore_spaces(self, s, m, parent):
710 r'[ \t]+' 711 # whitespace ignored (but does not cause us to leave this mode) 712 pass
713
714 - def t_not_redir(self, s, m, parent):
715 r'(?![ \t<>\|])' 716 # if not redirection or whitespace, just pop the state 717 del parent.current[-1]
718 719
720 -class _AcceptRedirScanner(_LaxScanner,_StrictAcceptRedirScanner):
721 """Scanner class where redirection is allowed""" 722 pass
723 724 725 #--------------------------------------------------------------------- 726 # Main context-sensitive scanner 727 #--------------------------------------------------------------------- 728 729 # dictionary of reserved keywords 730 731 732 # SEE ALSO ClScanner.__init__ for more ECL keywords. 733 _keywordDict = { 734 'begin': 1, 735 'break': 1, 736 'case': 1, 737 'default': 1, 738 'else': 1, 739 'end': 1, 740 'for': 1, 741 'goto': 1, 742 'if': 1, 743 'next': 1, 744 'procedure': 1, 745 'return': 1, 746 'switch': 1, 747 'while': 1, 748 } 749 750 _typeDict = { 'bool': 1, 751 'char': 1, 752 'file': 1, 753 'gcur': 1, 754 'imcur': 1, 755 'int': 1, 756 'pset': 1, 757 'real': 1, 758 'string': 1, 759 'struct': 1, 760 'ukey': 1, 761 } 762 763 _boolDict = { 'yes': 1, 'no': 1, } 764 765 # list of scanners for each state 766 # only need to create these once, since they are designed to 767 # contain no state information 768 769 _scannerDict = None 770 _strictScannerDict = None 771
772 -def _getScannerDict():
773 global _scannerDict 774 if _scannerDict is None: 775 _scannerDict = { 776 _START_LINE_MODE: _StartScanner(), 777 _COMMAND_MODE: _CommandScanner(), 778 _COMPUTE_START_MODE: _ComputeStartScanner(), 779 _COMPUTE_EQN_MODE: _ComputeEqnScanner(), 780 _COMPUTE_MODE: _ComputeScanner(), 781 _SWALLOW_NEWLINE_MODE: _SwallowNewlineScanner(), 782 _ACCEPT_REDIR_MODE: _AcceptRedirScanner(), 783 } 784 return _scannerDict
785
786 -def _getStrictScannerDict():
787 global _strictScannerDict 788 # create strict scanners 789 if _strictScannerDict is None: 790 _strictScannerDict = { 791 _START_LINE_MODE: _StrictStartScanner(), 792 _COMMAND_MODE: _StrictCommandScanner(), 793 _COMPUTE_START_MODE: _StrictComputeStartScanner(), 794 _COMPUTE_EQN_MODE: _StrictComputeEqnScanner(), 795 _COMPUTE_MODE: _StrictComputeScanner(), 796 _SWALLOW_NEWLINE_MODE: _StrictSwallowNewlineScanner(), 797 _ACCEPT_REDIR_MODE: _StrictAcceptRedirScanner(), 798 } 799 return _strictScannerDict
800 801
802 -class CLScanner(ContextSensitiveScanner):
803 804 """CL scanner class""" 805
806 - def __init__(self, strict=0):
807 808 if pyrafglobals._use_ecl: 809 _keywordDict["iferr"] = 1 810 _keywordDict["ifnoerr"] = 1 811 _keywordDict["then"] = 1 812 813 self.strict = strict 814 if strict: 815 sdict = _getStrictScannerDict() 816 else: 817 sdict = _getScannerDict() 818 ContextSensitiveScanner.__init__(self, sdict)
819
820 - def startLine(self, parencount=0, argsep=None):
821 # go to _START_LINE_MODE 822 self.parencount = parencount 823 self.argsep = argsep 824 self.current = [ _START_LINE_MODE ]
825
826 - def tokenize(self, input, default_mode=_COMMAND_MODE):
827 self.rv = [] 828 self.lineno = 1 829 # default mode when leaving _START_LINE_MODE 830 self.default_mode = default_mode 831 # argsep is used to insert commas as argument separators 832 # in command mode 833 self.argsep = None 834 self.parencount = 0 835 ContextSensitiveScanner.tokenize(self, input) 836 self.addToken(type='NEWLINE') 837 return self.rv
838
839 - def addToken(self, type, attr=None):
840 # add a token to the list (with some twists to simplify parsing) 841 842 if type is None: return 843 844 # insert NEWLINE before '}' 845 846 if type == '}' and self.rv and self.rv[-1].type != 'NEWLINE': 847 self.rv.append(Token(type='NEWLINE', attr=None, lineno=self.lineno)) 848 849 ## suppress newline after '{' or ';' 850 #if type != 'NEWLINE' or (self.rv and self.rv[-1].type != 'NEWLINE' and 851 # self.rv[-1].type != '{' and 852 # self.rv[-1].type != ';'): 853 854 # compress out multiple/leading newlines 855 # suppress newline after '{' 856 857 if type != 'NEWLINE' or (self.rv and self.rv[-1].type != 'NEWLINE' and 858 self.rv[-1].type != '{'): 859 860 # Another ugly hack -- the syntax 861 # 862 # taskname(arg, arg, | taskname2 arg, arg) 863 # 864 # causes parsing problems. To help solve them, delete any 865 # comma that just precedes a PIPE 866 867 if type=='PIPE' and self.rv and self.rv[-1].type == ',': 868 del self.rv[-1] 869 870 self.rv.append(Token(type=type, attr=attr, lineno=self.lineno)) 871 872 # insert NEWLINE after '}' too 873 # go to start-line mode 874 if type == '}' and self.rv and self.rv[-1].type != 'NEWLINE': 875 self.rv.append(Token(type='NEWLINE', attr=None, lineno=self.lineno)) 876 self.startLine()
877
878 - def addIdent(self, name, mode=None, usekey=1):
879 # Add identifier token, recognizing keywords if usekey parameter is set 880 # Note keywords may be in any case 881 # For normal (non-keyword) identifiers, goes to mode 882 883 keyword = name.lower() 884 if usekey and _keywordDict.has_key(keyword): 885 886 self.addToken(type=keyword.upper(), attr=keyword) 887 if keyword == "procedure": 888 # Procedure scripts are always in compute mode 889 self.default_mode = _COMPUTE_START_MODE 890 if keyword == "if" or keyword == "else": 891 # For `if', `else' go into _START_LINE_MODE 892 self.startLine() 893 elif self.current[-1] != _COMPUTE_MODE: 894 # Other keywords put us into _COMPUTE_MODE 895 self.current.append(_COMPUTE_MODE) 896 897 elif usekey and _typeDict.has_key(keyword) and \ 898 self.current[-1] == _START_LINE_MODE: 899 900 # types are treated as keywords only if first token on line 901 self.addToken(type='TYPE', attr=keyword) 902 self.current.append(_COMPUTE_MODE) 903 904 elif keyword == "indef" or keyword == "eof": 905 906 # INDEF, EOF always get recognized 907 self.addToken(type=keyword.upper()) 908 909 elif keyword == "epsilon": 910 911 # epsilon always gets recognized 912 self.addToken(type="FLOAT", attr=keyword) 913 # xxx self.addToken(type="FLOAT") 914 # AttributeError: 'NoneType' object has no attribute 'find' 915 # xxx self.addToken(type=keyword.upper()) 916 # epsilon was quoted 917 918 elif _boolDict.has_key(keyword): 919 920 # boolean yes, no always gets recognized 921 self.addToken(type='BOOL', attr=keyword) 922 923 else: 924 925 self.addToken(type='IDENT',attr=name) 926 if mode is not None: self.current.append(mode)
927
928 - def enterComputeEqnMode(self):
929 # Nasty hack to work around weird CL syntax 930 # In compute-start mode, tokens are strings or identifiers 931 # or numbers depending on what follows them, and the mode 932 # once switched to compute-mode stays there until a 933 # terminating comma. Ugly stuff. 934 # 935 # This is called when a token is received that triggers the 936 # transition to the compute-eqn mode from compute-start mode. 937 # It may be necessary to change tokens already on the 938 # list when this is called... 939 940 self.current.append(_COMPUTE_EQN_MODE) 941 if self.rv and self.rv[-1].type == "STRING": 942 # if last token was a string, we must remove it and 943 # rescan it using the compute-mode scanner 944 # Hope this works! 945 last = self.rv[-1].attr 946 del self.rv[-1] 947 ContextSensitiveScanner.tokenize(self, last)
948
949 - def exitComputeEqnMode(self):
950 # Companion to enterComputeEqnMode -- called when we encounter 951 # a token that may cause us to exit the mode 952 if self.current[-1] == _COMPUTE_EQN_MODE: 953 del self.current[-1]
954 955
956 -def _countNewlines(s):
957 """Return number of newlines in string""" 958 n = 0 959 i = s.find('\n') 960 while (i>=0): 961 n = n+1 962 i = s.find('\n', i+1) 963 return n
964 965
966 -def scan(f):
967 input = f.read() 968 scanner = CLScanner() 969 return scanner.tokenize(input)
970 971
972 -def toklist(tlist,filename=None):
973 # list tokens 974 import cltoken 975 if filename: 976 import sys 977 sys.stdout = open(filename,'w') 978 for tok in tlist: 979 if tok.type == 'NEWLINE': 980 if cltoken.verbose: 981 print 'NEWLINE' 982 else: 983 print 984 else: 985 print `tok`, 986 if filename: 987 sys.stdout.close() 988 sys.stdout = sys.__stdout__
989 990 if __name__ == '__main__': 991 s = CLScanner() 992 993 # scan file 'simple.cl' 994 995 lines = open('simple.cl').read() 996 tokens = s.tokenize(lines) 997 998 toklist(tokens[:30]) 999