1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 __version__ = "3.2"
35 __tabversion__ = "3.2"
36
37 import re, sys, types, copy, os
38
39
40 try:
41
42 StringTypes = (types.StringType, types.UnicodeType)
43 except AttributeError:
44
45 StringTypes = (str, bytes)
46
47
48
49
50 if sys.version_info[0] < 3:
53 else:
56
57
58 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
59
60
61
62
65 self.args = (message,)
66 self.text = s
67
68
74
75
76
77
92
93
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
116 self.lexre = None
117
118
119
120 self.lexretext = None
121 self.lexstatere = {}
122 self.lexstateretext = {}
123 self.lexstaterenames = {}
124 self.lexstate = "INITIAL"
125 self.lexstatestack = []
126 self.lexstateinfo = None
127 self.lexstateignore = {}
128 self.lexstateerrorf = {}
129 self.lexreflags = 0
130 self.lexdata = None
131 self.lexpos = 0
132 self.lexlen = 0
133 self.lexerrorf = None
134 self.lextokens = None
135 self.lexignore = ""
136 self.lexliterals = ""
137 self.lexmodule = None
138 self.lineno = 1
139 self.lexoptimize = 0
140
141 - def clone(self,object=None):
142 c = copy.copy(self)
143
144
145
146
147
148 if object:
149 newtab = { }
150 for key, ritem in self.lexstatere.items():
151 newre = []
152 for cre, findex in ritem:
153 newfindex = []
154 for f in findex:
155 if not f or not f[0]:
156 newfindex.append(f)
157 continue
158 newfindex.append((getattr(object,f[0].__name__),f[1]))
159 newre.append((cre,newfindex))
160 newtab[key] = newre
161 c.lexstatere = newtab
162 c.lexstateerrorf = { }
163 for key, ef in self.lexstateerrorf.items():
164 c.lexstateerrorf[key] = getattr(object,ef.__name__)
165 c.lexmodule = object
166 return c
167
168
169
170
171 - def writetab(self,tabfile,outputdir=""):
172 if isinstance(tabfile,types.ModuleType):
173 return
174 basetabfilename = tabfile.split(".")[-1]
175 filename = os.path.join(outputdir,basetabfilename)+".py"
176 tf = open(filename,"w")
177 tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
178 tf.write("_tabversion = %s\n" % repr(__version__))
179 tf.write("_lextokens = %s\n" % repr(self.lextokens))
180 tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
181 tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
182 tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
183
184 tabre = { }
185
186 initial = self.lexstatere["INITIAL"]
187 initialfuncs = []
188 for part in initial:
189 for f in part[1]:
190 if f and f[0]:
191 initialfuncs.append(f)
192
193 for key, lre in self.lexstatere.items():
194 titem = []
195 for i in range(len(lre)):
196 titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))
197 tabre[key] = titem
198
199 tf.write("_lexstatere = %s\n" % repr(tabre))
200 tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
201
202 taberr = { }
203 for key, ef in self.lexstateerrorf.items():
204 if ef:
205 taberr[key] = ef.__name__
206 else:
207 taberr[key] = None
208 tf.write("_lexstateerrorf = %s\n" % repr(taberr))
209 tf.close()
210
211
212
213
215 if isinstance(tabfile,types.ModuleType):
216 lextab = tabfile
217 else:
218 if sys.version_info[0] < 3:
219 exec("import %s as lextab" % tabfile)
220 else:
221 env = { }
222 exec("import %s as lextab" % tabfile, env,env)
223 lextab = env['lextab']
224
225 if getattr(lextab,"_tabversion","0.0") != __version__:
226 raise ImportError("Inconsistent PLY version")
227
228 self.lextokens = lextab._lextokens
229 self.lexreflags = lextab._lexreflags
230 self.lexliterals = lextab._lexliterals
231 self.lexstateinfo = lextab._lexstateinfo
232 self.lexstateignore = lextab._lexstateignore
233 self.lexstatere = { }
234 self.lexstateretext = { }
235 for key,lre in lextab._lexstatere.items():
236 titem = []
237 txtitem = []
238 for i in range(len(lre)):
239 titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict)))
240 txtitem.append(lre[i][0])
241 self.lexstatere[key] = titem
242 self.lexstateretext[key] = txtitem
243 self.lexstateerrorf = { }
244 for key,ef in lextab._lexstateerrorf.items():
245 self.lexstateerrorf[key] = fdict[ef]
246 self.begin('INITIAL')
247
248
249
250
259
260
261
262
264 if not state in self.lexstatere:
265 raise ValueError("Undefined state")
266 self.lexre = self.lexstatere[state]
267 self.lexretext = self.lexstateretext[state]
268 self.lexignore = self.lexstateignore.get(state,"")
269 self.lexerrorf = self.lexstateerrorf.get(state,None)
270 self.lexstate = state
271
272
273
274
276 self.lexstatestack.append(self.lexstate)
277 self.begin(state)
278
279
280
281
283 self.begin(self.lexstatestack.pop())
284
285
286
287
290
291
292
293
296
297
298
299
300
301
302
303
305
306 lexpos = self.lexpos
307 lexlen = self.lexlen
308 lexignore = self.lexignore
309 lexdata = self.lexdata
310
311 while lexpos < lexlen:
312
313 if lexdata[lexpos] in lexignore:
314 lexpos += 1
315 continue
316
317
318 for lexre,lexindexfunc in self.lexre:
319 m = lexre.match(lexdata,lexpos)
320 if not m: continue
321
322
323 tok = LexToken()
324 tok.value = m.group()
325 tok.lineno = self.lineno
326 tok.lexpos = lexpos
327
328 i = m.lastindex
329 func,tok.type = lexindexfunc[i]
330
331 if not func:
332
333 if tok.type:
334 self.lexpos = m.end()
335 return tok
336 else:
337 lexpos = m.end()
338 break
339
340 lexpos = m.end()
341
342
343
344 tok.lexer = self
345 self.lexmatch = m
346 self.lexpos = lexpos
347
348 newtok = func(tok)
349
350
351 if not newtok:
352 lexpos = self.lexpos
353 lexignore = self.lexignore
354 break
355
356
357 if not self.lexoptimize:
358 if not newtok.type in self.lextokens:
359 raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
360 func_code(func).co_filename, func_code(func).co_firstlineno,
361 func.__name__, newtok.type),lexdata[lexpos:])
362
363 return newtok
364 else:
365
366 if lexdata[lexpos] in self.lexliterals:
367 tok = LexToken()
368 tok.value = lexdata[lexpos]
369 tok.lineno = self.lineno
370 tok.type = tok.value
371 tok.lexpos = lexpos
372 self.lexpos = lexpos + 1
373 return tok
374
375
376 if self.lexerrorf:
377 tok = LexToken()
378 tok.value = self.lexdata[lexpos:]
379 tok.lineno = self.lineno
380 tok.type = "error"
381 tok.lexer = self
382 tok.lexpos = lexpos
383 self.lexpos = lexpos
384 newtok = self.lexerrorf(tok)
385 if lexpos == self.lexpos:
386
387 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
388 lexpos = self.lexpos
389 if not newtok: continue
390 return newtok
391
392 self.lexpos = lexpos
393 raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
394
395 self.lexpos = lexpos + 1
396 if self.lexdata is None:
397 raise RuntimeError("No input string given with input()")
398 return None
399
400
403
405 t = self.token()
406 if t is None:
407 raise StopIteration
408 return t
409
410 __next__ = next
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
428 try:
429 raise RuntimeError
430 except RuntimeError:
431 e,b,t = sys.exc_info()
432 f = t.tb_frame
433 while levels > 0:
434 f = f.f_back
435 levels -= 1
436 ldict = f.f_globals.copy()
437 if f.f_globals != f.f_locals:
438 ldict.update(f.f_locals)
439
440 return ldict
441
442
443
444
445
446
447
448
450 result = []
451 for f,name in zip(funclist,namelist):
452 if f and f[0]:
453 result.append((name, f[1]))
454 else:
455 result.append(f)
456 return result
457
458
459
460
461
462
463
464
466 result = []
467 for n in namelist:
468 if n and n[0]:
469 result.append((fdict[n[0]],n[1]))
470 else:
471 result.append(n)
472 return result
473
474
475
476
477
478
479
480
481
511
512
513
514
515
516
517
518
519
520
522 nonstate = 1
523 parts = s.split("_")
524 for i in range(1,len(parts)):
525 if not parts[i] in names and parts[i] != 'ANY': break
526 if i > 1:
527 states = tuple(parts[1:i])
528 else:
529 states = ('INITIAL',)
530
531 if 'ANY' in states:
532 states = tuple(names)
533
534 tokenname = "_".join(parts[i:])
535 return (states,tokenname)
536
537
538
539
540
541
542
543
545 - def __init__(self,ldict,log=None,reflags=0):
546 self.ldict = ldict
547 self.error_func = None
548 self.tokens = []
549 self.reflags = reflags
550 self.stateinfo = { 'INITIAL' : 'inclusive'}
551 self.files = {}
552 self.error = 0
553
554 if log is None:
555 self.log = PlyLogger(sys.stderr)
556 else:
557 self.log = log
558
559
565
566
572
573
575 tokens = self.ldict.get("tokens",None)
576 if not tokens:
577 self.log.error("No token list is defined")
578 self.error = 1
579 return
580
581 if not isinstance(tokens,(list, tuple)):
582 self.log.error("tokens must be a list or tuple")
583 self.error = 1
584 return
585
586 if not tokens:
587 self.log.error("tokens is empty")
588 self.error = 1
589 return
590
591 self.tokens = tokens
592
593
595 terminals = {}
596 for n in self.tokens:
597 if not _is_identifier.match(n):
598 self.log.error("Bad token name '%s'",n)
599 self.error = 1
600 if n in terminals:
601 self.log.warning("Token '%s' multiply defined", n)
602 terminals[n] = 1
603
604
607
608
610 try:
611 for c in self.literals:
612 if not isinstance(c,StringTypes) or len(c) > 1:
613 self.log.error("Invalid literal %s. Must be a single character", repr(c))
614 self.error = 1
615 continue
616
617 except TypeError:
618 self.log.error("Invalid literals specification. literals must be a sequence of characters")
619 self.error = 1
620
622 self.states = self.ldict.get("states",None)
623
624 if self.states:
625 if not isinstance(self.states,(tuple,list)):
626 self.log.error("states must be defined as a tuple or list")
627 self.error = 1
628 else:
629 for s in self.states:
630 if not isinstance(s,tuple) or len(s) != 2:
631 self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
632 self.error = 1
633 continue
634 name, statetype = s
635 if not isinstance(name,StringTypes):
636 self.log.error("State name %s must be a string", repr(name))
637 self.error = 1
638 continue
639 if not (statetype == 'inclusive' or statetype == 'exclusive'):
640 self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)
641 self.error = 1
642 continue
643 if name in self.stateinfo:
644 self.log.error("State '%s' already defined",name)
645 self.error = 1
646 continue
647 self.stateinfo[name] = statetype
648
649
650
651
653 tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
654
655
656
657 self.toknames = { }
658 self.funcsym = { }
659 self.strsym = { }
660 self.ignore = { }
661 self.errorf = { }
662
663 for s in self.stateinfo:
664 self.funcsym[s] = []
665 self.strsym[s] = []
666
667 if len(tsymbols) == 0:
668 self.log.error("No rules of the form t_rulename are defined")
669 self.error = 1
670 return
671
672 for f in tsymbols:
673 t = self.ldict[f]
674 states, tokname = _statetoken(f,self.stateinfo)
675 self.toknames[f] = tokname
676
677 if hasattr(t,"__call__"):
678 if tokname == 'error':
679 for s in states:
680 self.errorf[s] = t
681 elif tokname == 'ignore':
682 line = func_code(t).co_firstlineno
683 file = func_code(t).co_filename
684 self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)
685 self.error = 1
686 else:
687 for s in states:
688 self.funcsym[s].append((f,t))
689 elif isinstance(t, StringTypes):
690 if tokname == 'ignore':
691 for s in states:
692 self.ignore[s] = t
693 if "\\" in t:
694 self.log.warning("%s contains a literal backslash '\\'",f)
695
696 elif tokname == 'error':
697 self.log.error("Rule '%s' must be defined as a function", f)
698 self.error = 1
699 else:
700 for s in states:
701 self.strsym[s].append((f,t))
702 else:
703 self.log.error("%s not defined as a function or string", f)
704 self.error = 1
705
706
707 for f in self.funcsym.values():
708 if sys.version_info[0] < 3:
709 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))
710 else:
711
712 f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
713
714
715 for s in self.strsym.values():
716 if sys.version_info[0] < 3:
717 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
718 else:
719
720 s.sort(key=lambda x: len(x[1]),reverse=True)
721
722
724 for state in self.stateinfo:
725
726
727
728
729 for fname, f in self.funcsym[state]:
730 line = func_code(f).co_firstlineno
731 file = func_code(f).co_filename
732 self.files[file] = 1
733
734 tokname = self.toknames[fname]
735 if isinstance(f, types.MethodType):
736 reqargs = 2
737 else:
738 reqargs = 1
739 nargs = func_code(f).co_argcount
740 if nargs > reqargs:
741 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
742 self.error = 1
743 continue
744
745 if nargs < reqargs:
746 self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
747 self.error = 1
748 continue
749
750 if not f.__doc__:
751 self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)
752 self.error = 1
753 continue
754
755 try:
756 c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
757 if c.match(""):
758 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
759 self.error = 1
760 except re.error:
761 _etype, e, _etrace = sys.exc_info()
762 self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)
763 if '#' in f.__doc__:
764 self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)
765 self.error = 1
766
767
768 for name,r in self.strsym[state]:
769 tokname = self.toknames[name]
770 if tokname == 'error':
771 self.log.error("Rule '%s' must be defined as a function", name)
772 self.error = 1
773 continue
774
775 if not tokname in self.tokens and tokname.find("ignore_") < 0:
776 self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)
777 self.error = 1
778 continue
779
780 try:
781 c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)
782 if (c.match("")):
783 self.log.error("Regular expression for rule '%s' matches empty string",name)
784 self.error = 1
785 except re.error:
786 _etype, e, _etrace = sys.exc_info()
787 self.log.error("Invalid regular expression for rule '%s'. %s",name,e)
788 if '#' in r:
789 self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)
790 self.error = 1
791
792 if not self.funcsym[state] and not self.strsym[state]:
793 self.log.error("No rules defined for state '%s'",state)
794 self.error = 1
795
796
797 efunc = self.errorf.get(state,None)
798 if efunc:
799 f = efunc
800 line = func_code(f).co_firstlineno
801 file = func_code(f).co_filename
802 self.files[file] = 1
803
804 if isinstance(f, types.MethodType):
805 reqargs = 2
806 else:
807 reqargs = 1
808 nargs = func_code(f).co_argcount
809 if nargs > reqargs:
810 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
811 self.error = 1
812
813 if nargs < reqargs:
814 self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
815 self.error = 1
816
817 for f in self.files:
818 self.validate_file(f)
819
820
821
822
823
824
825
826
827
828
830 import os.path
831 base,ext = os.path.splitext(filename)
832 if ext != '.py': return
833
834 try:
835 f = open(filename)
836 lines = f.readlines()
837 f.close()
838 except IOError:
839 return
840
841 fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
842 sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
843
844 counthash = { }
845 linen = 1
846 for l in lines:
847 m = fre.match(l)
848 if not m:
849 m = sre.match(l)
850 if m:
851 name = m.group(1)
852 prev = counthash.get(name)
853 if not prev:
854 counthash[name] = linen
855 else:
856 self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
857 self.error = 1
858 linen += 1
859
860
861
862
863
864
865 -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
866 global lexer
867 ldict = None
868 stateinfo = { 'INITIAL' : 'inclusive'}
869 lexobj = Lexer()
870 lexobj.lexoptimize = optimize
871 global token,input
872
873 if errorlog is None:
874 errorlog = PlyLogger(sys.stderr)
875
876 if debug:
877 if debuglog is None:
878 debuglog = PlyLogger(sys.stderr)
879
880
881 if object: module = object
882
883 if module:
884 _items = [(k,getattr(module,k)) for k in dir(module)]
885 ldict = dict(_items)
886 else:
887 ldict = get_caller_module_dict(2)
888
889
890 linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
891 linfo.get_all()
892 if not optimize:
893 if linfo.validate_all():
894 raise SyntaxError("Can't build lexer")
895
896 if optimize and lextab:
897 try:
898 lexobj.readtab(lextab,ldict)
899 token = lexobj.token
900 input = lexobj.input
901 lexer = lexobj
902 return lexobj
903
904 except ImportError:
905 pass
906
907
908 if debug:
909 debuglog.info("lex: tokens = %r", linfo.tokens)
910 debuglog.info("lex: literals = %r", linfo.literals)
911 debuglog.info("lex: states = %r", linfo.stateinfo)
912
913
914 lexobj.lextokens = { }
915 for n in linfo.tokens:
916 lexobj.lextokens[n] = 1
917
918
919 if isinstance(linfo.literals,(list,tuple)):
920 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
921 else:
922 lexobj.lexliterals = linfo.literals
923
924
925 stateinfo = linfo.stateinfo
926
927 regexs = { }
928
929 for state in stateinfo:
930 regex_list = []
931
932
933 for fname, f in linfo.funcsym[state]:
934 line = func_code(f).co_firstlineno
935 file = func_code(f).co_filename
936 regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
937 if debug:
938 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
939
940
941 for name,r in linfo.strsym[state]:
942 regex_list.append("(?P<%s>%s)" % (name,r))
943 if debug:
944 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
945
946 regexs[state] = regex_list
947
948
949
950 if debug:
951 debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
952
953 for state in regexs:
954 lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
955 lexobj.lexstatere[state] = lexre
956 lexobj.lexstateretext[state] = re_text
957 lexobj.lexstaterenames[state] = re_names
958 if debug:
959 for i in range(len(re_text)):
960 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
961
962
963 for state,stype in stateinfo.items():
964 if state != "INITIAL" and stype == 'inclusive':
965 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
966 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
967 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
968
969 lexobj.lexstateinfo = stateinfo
970 lexobj.lexre = lexobj.lexstatere["INITIAL"]
971 lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
972
973
974 lexobj.lexstateignore = linfo.ignore
975 lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
976
977
978 lexobj.lexstateerrorf = linfo.errorf
979 lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
980 if not lexobj.lexerrorf:
981 errorlog.warning("No t_error rule is defined")
982
983
984 for s,stype in stateinfo.items():
985 if stype == 'exclusive':
986 if not s in linfo.errorf:
987 errorlog.warning("No error rule is defined for exclusive state '%s'", s)
988 if not s in linfo.ignore and lexobj.lexignore:
989 errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
990 elif stype == 'inclusive':
991 if not s in linfo.errorf:
992 linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
993 if not s in linfo.ignore:
994 linfo.ignore[s] = linfo.ignore.get("INITIAL","")
995
996
997 token = lexobj.token
998 input = lexobj.input
999 lexer = lexobj
1000
1001
1002 if lextab and optimize:
1003 lexobj.writetab(lextab,outputdir)
1004
1005 return lexobj
1006
1007
1008
1009
1010
1011
1012
1013 -def runmain(lexer=None,data=None):
1014 if not data:
1015 try:
1016 filename = sys.argv[1]
1017 f = open(filename)
1018 data = f.read()
1019 f.close()
1020 except IndexError:
1021 sys.stdout.write("Reading from standard input (type EOF to end):\n")
1022 data = sys.stdin.read()
1023
1024 if lexer:
1025 _input = lexer.input
1026 else:
1027 _input = input
1028 _input(data)
1029 if lexer:
1030 _token = lexer.token
1031 else:
1032 _token = token
1033
1034 while 1:
1035 tok = _token()
1036 if not tok: break
1037 sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
1038
1039
1040
1041
1042
1043
1044
1045
1047 def set_doc(f):
1048 if hasattr(r,"__call__"):
1049 f.__doc__ = r.__doc__
1050 else:
1051 f.__doc__ = r
1052 return f
1053 return set_doc
1054
1055
1056 Token = TOKEN
1057