1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62 __version__ = "3.2"
63 __tabversion__ = "3.2"
64
65
66
67
68
69
70
71 yaccdebug = 1
72
73
74 debug_file = 'parser.out'
75 tab_module = 'parsetab'
76 default_lr = 'LALR'
77
78 error_count = 3
79
80 yaccdevel = 0
81
82
83 resultlimit = 40
84
85 pickle_protocol = 0
86
87 import re, types, sys, os.path
88
89
90 if sys.version_info[0] < 3:
93 else:
96
97
98 try:
99 MAXINT = sys.maxint
100 except AttributeError:
101 MAXINT = sys.maxsize
102
103
105 if sys.version_info[0] < 3:
106 import lex
107 else:
108 import ply.lex as lex
109 return lex
110
111
112
113
114
115
116
120 - def debug(self,msg,*args,**kwargs):
121 self.f.write((msg % args) + "\n")
122 info = debug
123
124 - def warning(self,msg,*args,**kwargs):
125 self.f.write("WARNING: "+ (msg % args) + "\n")
126
127 - def error(self,msg,*args,**kwargs):
128 self.f.write("ERROR: " + (msg % args) + "\n")
129
130 critical = debug
131
132
138
139
141
142
150
151
152
154 repr_str = repr(r)
155 if '\n' in repr_str: repr_str = repr(repr_str)
156 if len(repr_str) < 16:
157 return repr_str
158 else:
159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
179 - def __str__(self): return self.type
181
182
183
184
185
186
187
188
189
190
193 self.slice = s
194 self.stack = stack
195 self.lexer = None
196 self.parser= None
198 if n >= 0: return self.slice[n].value
199 else: return self.stack[n].value
200
202 self.slice[n].value = v
203
205 return [s.value for s in self.slice[i:j]]
206
208 return len(self.slice)
209
211 return getattr(self.slice[n],"lineno",0)
212
215
217 startline = getattr(self.slice[n],"lineno",0)
218 endline = getattr(self.slice[n],"endlineno",startline)
219 return startline,endline
220
222 return getattr(self.slice[n],"lexpos",0)
223
225 startpos = getattr(self.slice[n],"lexpos",0)
226 endpos = getattr(self.slice[n],"endlexpos",startpos)
227 return startpos,endpos
228
231
232
233
234
235
236
237
238
241 self.productions = lrtab.lr_productions
242 self.action = lrtab.lr_action
243 self.goto = lrtab.lr_goto
244 self.errorfunc = errorf
245
248
250 del self.statestack[:]
251 del self.symstack[:]
252 sym = YaccSymbol()
253 sym.type = '$end'
254 self.symstack.append(sym)
255 self.statestack.append(0)
256
257 - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
258 if debug or yaccdevel:
259 if isinstance(debug,int):
260 debug = PlyLogger(sys.stderr)
261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
262 elif tracking:
263 return self.parseopt(input,lexer,debug,tracking,tokenfunc)
264 else:
265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282 - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
283 lookahead = None
284 lookaheadstack = [ ]
285 actions = self.action
286 goto = self.goto
287 prod = self.productions
288 pslice = YaccProduction(None)
289 errorcount = 0
290
291
292 debug.info("PLY: PARSE DEBUG START")
293
294
295
296 if not lexer:
297 lex = load_ply_lex()
298 lexer = lex.lexer
299
300
301 pslice.lexer = lexer
302 pslice.parser = self
303
304
305 if input is not None:
306 lexer.input(input)
307
308 if tokenfunc is None:
309
310 get_token = lexer.token
311 else:
312 get_token = tokenfunc
313
314
315
316 statestack = [ ]
317 self.statestack = statestack
318 symstack = [ ]
319 self.symstack = symstack
320
321 pslice.stack = symstack
322 errtoken = None
323
324
325
326 statestack.append(0)
327 sym = YaccSymbol()
328 sym.type = "$end"
329 symstack.append(sym)
330 state = 0
331 while 1:
332
333
334
335
336
337 debug.debug('')
338 debug.debug('State : %s', state)
339
340
341 if not lookahead:
342 if not lookaheadstack:
343 lookahead = get_token()
344 else:
345 lookahead = lookaheadstack.pop()
346 if not lookahead:
347 lookahead = YaccSymbol()
348 lookahead.type = "$end"
349
350
351 debug.debug('Stack : %s',
352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
353
354
355
356 ltype = lookahead.type
357 t = actions[state].get(ltype)
358
359 if t is not None:
360 if t > 0:
361
362 statestack.append(t)
363 state = t
364
365
366 debug.debug("Action : Shift and goto state %s", t)
367
368
369 symstack.append(lookahead)
370 lookahead = None
371
372
373 if errorcount: errorcount -=1
374 continue
375
376 if t < 0:
377
378 p = prod[-t]
379 pname = p.name
380 plen = p.len
381
382
383 sym = YaccSymbol()
384 sym.type = pname
385 sym.value = None
386
387
388 if plen:
389 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
390 else:
391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
392
393
394
395 if plen:
396 targ = symstack[-plen-1:]
397 targ[0] = sym
398
399
400 if tracking:
401 t1 = targ[1]
402 sym.lineno = t1.lineno
403 sym.lexpos = t1.lexpos
404 t1 = targ[-1]
405 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
407
408
409
410
411
412
413
414
415 pslice.slice = targ
416
417 try:
418
419 del symstack[-plen:]
420 del statestack[-plen:]
421 p.callable(pslice)
422
423 debug.info("Result : %s", format_result(pslice[0]))
424
425 symstack.append(sym)
426 state = goto[statestack[-1]][pname]
427 statestack.append(state)
428 except SyntaxError:
429
430 lookaheadstack.append(lookahead)
431 symstack.pop()
432 statestack.pop()
433 state = statestack[-1]
434 sym.type = 'error'
435 lookahead = sym
436 errorcount = error_count
437 self.errorok = 0
438 continue
439
440
441 else:
442
443
444 if tracking:
445 sym.lineno = lexer.lineno
446 sym.lexpos = lexer.lexpos
447
448
449 targ = [ sym ]
450
451
452
453
454
455
456 pslice.slice = targ
457
458 try:
459
460 p.callable(pslice)
461
462 debug.info("Result : %s", format_result(pslice[0]))
463
464 symstack.append(sym)
465 state = goto[statestack[-1]][pname]
466 statestack.append(state)
467 except SyntaxError:
468
469 lookaheadstack.append(lookahead)
470 symstack.pop()
471 statestack.pop()
472 state = statestack[-1]
473 sym.type = 'error'
474 lookahead = sym
475 errorcount = error_count
476 self.errorok = 0
477 continue
478
479
480 if t == 0:
481 n = symstack[-1]
482 result = getattr(n,"value",None)
483
484 debug.info("Done : Returning %s", format_result(result))
485 debug.info("PLY: PARSE DEBUG END")
486
487 return result
488
489 if t == None:
490
491
492 debug.error('Error : %s',
493 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
494
495
496
497
498
499
500
501
502
503
504
505
506 if errorcount == 0 or self.errorok:
507 errorcount = error_count
508 self.errorok = 0
509 errtoken = lookahead
510 if errtoken.type == "$end":
511 errtoken = None
512 if self.errorfunc:
513 global errok,token,restart
514 errok = self.errok
515 token = get_token
516 restart = self.restart
517 if errtoken and not hasattr(errtoken,'lexer'):
518 errtoken.lexer = lexer
519 tok = self.errorfunc(errtoken)
520 del errok, token, restart
521
522 if self.errorok:
523
524
525
526 lookahead = tok
527 errtoken = None
528 continue
529 else:
530 if errtoken:
531 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
532 else: lineno = 0
533 if lineno:
534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
535 else:
536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
537 else:
538 sys.stderr.write("yacc: Parse error in input. EOF\n")
539 return
540
541 else:
542 errorcount = error_count
543
544
545
546
547
548 if len(statestack) <= 1 and lookahead.type != "$end":
549 lookahead = None
550 errtoken = None
551 state = 0
552
553 del lookaheadstack[:]
554 continue
555
556
557
558
559
560 if lookahead.type == "$end":
561
562 return
563
564 if lookahead.type != 'error':
565 sym = symstack[-1]
566 if sym.type == 'error':
567
568
569 lookahead = None
570 continue
571 t = YaccSymbol()
572 t.type = 'error'
573 if hasattr(lookahead,"lineno"):
574 t.lineno = lookahead.lineno
575 t.value = lookahead
576 lookaheadstack.append(lookahead)
577 lookahead = t
578 else:
579 symstack.pop()
580 statestack.pop()
581 state = statestack[-1]
582
583 continue
584
585
586 raise RuntimeError("yacc: internal parser error!!!\n")
587
588
589
590
591
592
593
594
595
596
597 - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
598 lookahead = None
599 lookaheadstack = [ ]
600 actions = self.action
601 goto = self.goto
602 prod = self.productions
603 pslice = YaccProduction(None)
604 errorcount = 0
605
606
607 if not lexer:
608 lex = load_ply_lex()
609 lexer = lex.lexer
610
611
612 pslice.lexer = lexer
613 pslice.parser = self
614
615
616 if input is not None:
617 lexer.input(input)
618
619 if tokenfunc is None:
620
621 get_token = lexer.token
622 else:
623 get_token = tokenfunc
624
625
626
627 statestack = [ ]
628 self.statestack = statestack
629 symstack = [ ]
630 self.symstack = symstack
631
632 pslice.stack = symstack
633 errtoken = None
634
635
636
637 statestack.append(0)
638 sym = YaccSymbol()
639 sym.type = '$end'
640 symstack.append(sym)
641 state = 0
642 while 1:
643
644
645
646
647 if not lookahead:
648 if not lookaheadstack:
649 lookahead = get_token()
650 else:
651 lookahead = lookaheadstack.pop()
652 if not lookahead:
653 lookahead = YaccSymbol()
654 lookahead.type = '$end'
655
656
657 ltype = lookahead.type
658 t = actions[state].get(ltype)
659
660 if t is not None:
661 if t > 0:
662
663 statestack.append(t)
664 state = t
665
666 symstack.append(lookahead)
667 lookahead = None
668
669
670 if errorcount: errorcount -=1
671 continue
672
673 if t < 0:
674
675 p = prod[-t]
676 pname = p.name
677 plen = p.len
678
679
680 sym = YaccSymbol()
681 sym.type = pname
682 sym.value = None
683
684 if plen:
685 targ = symstack[-plen-1:]
686 targ[0] = sym
687
688
689 if tracking:
690 t1 = targ[1]
691 sym.lineno = t1.lineno
692 sym.lexpos = t1.lexpos
693 t1 = targ[-1]
694 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
696
697
698
699
700
701
702
703
704 pslice.slice = targ
705
706 try:
707
708 del symstack[-plen:]
709 del statestack[-plen:]
710 p.callable(pslice)
711 symstack.append(sym)
712 state = goto[statestack[-1]][pname]
713 statestack.append(state)
714 except SyntaxError:
715
716 lookaheadstack.append(lookahead)
717 symstack.pop()
718 statestack.pop()
719 state = statestack[-1]
720 sym.type = 'error'
721 lookahead = sym
722 errorcount = error_count
723 self.errorok = 0
724 continue
725
726
727 else:
728
729
730 if tracking:
731 sym.lineno = lexer.lineno
732 sym.lexpos = lexer.lexpos
733
734
735 targ = [ sym ]
736
737
738
739
740
741
742 pslice.slice = targ
743
744 try:
745
746 p.callable(pslice)
747 symstack.append(sym)
748 state = goto[statestack[-1]][pname]
749 statestack.append(state)
750 except SyntaxError:
751
752 lookaheadstack.append(lookahead)
753 symstack.pop()
754 statestack.pop()
755 state = statestack[-1]
756 sym.type = 'error'
757 lookahead = sym
758 errorcount = error_count
759 self.errorok = 0
760 continue
761
762
763 if t == 0:
764 n = symstack[-1]
765 return getattr(n,"value",None)
766
767 if t == None:
768
769
770
771
772
773
774
775
776
777
778
779 if errorcount == 0 or self.errorok:
780 errorcount = error_count
781 self.errorok = 0
782 errtoken = lookahead
783 if errtoken.type == '$end':
784 errtoken = None
785 if self.errorfunc:
786 global errok,token,restart
787 errok = self.errok
788 token = get_token
789 restart = self.restart
790 if errtoken and not hasattr(errtoken,'lexer'):
791 errtoken.lexer = lexer
792 tok = self.errorfunc(errtoken)
793 del errok, token, restart
794
795 if self.errorok:
796
797
798
799 lookahead = tok
800 errtoken = None
801 continue
802 else:
803 if errtoken:
804 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
805 else: lineno = 0
806 if lineno:
807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
808 else:
809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
810 else:
811 sys.stderr.write("yacc: Parse error in input. EOF\n")
812 return
813
814 else:
815 errorcount = error_count
816
817
818
819
820
821 if len(statestack) <= 1 and lookahead.type != '$end':
822 lookahead = None
823 errtoken = None
824 state = 0
825
826 del lookaheadstack[:]
827 continue
828
829
830
831
832
833 if lookahead.type == '$end':
834
835 return
836
837 if lookahead.type != 'error':
838 sym = symstack[-1]
839 if sym.type == 'error':
840
841
842 lookahead = None
843 continue
844 t = YaccSymbol()
845 t.type = 'error'
846 if hasattr(lookahead,"lineno"):
847 t.lineno = lookahead.lineno
848 t.value = lookahead
849 lookaheadstack.append(lookahead)
850 lookahead = t
851 else:
852 symstack.pop()
853 statestack.pop()
854 state = statestack[-1]
855
856 continue
857
858
859 raise RuntimeError("yacc: internal parser error!!!\n")
860
861
862
863
864
865
866
867
868
869 - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
870 lookahead = None
871 lookaheadstack = [ ]
872 actions = self.action
873 goto = self.goto
874 prod = self.productions
875 pslice = YaccProduction(None)
876 errorcount = 0
877
878
879 if not lexer:
880 lex = load_ply_lex()
881 lexer = lex.lexer
882
883
884 pslice.lexer = lexer
885 pslice.parser = self
886
887
888 if input is not None:
889 lexer.input(input)
890
891 if tokenfunc is None:
892
893 get_token = lexer.token
894 else:
895 get_token = tokenfunc
896
897
898
899 statestack = [ ]
900 self.statestack = statestack
901 symstack = [ ]
902 self.symstack = symstack
903
904 pslice.stack = symstack
905 errtoken = None
906
907
908
909 statestack.append(0)
910 sym = YaccSymbol()
911 sym.type = '$end'
912 symstack.append(sym)
913 state = 0
914 while 1:
915
916
917
918
919 if not lookahead:
920 if not lookaheadstack:
921 lookahead = get_token()
922 else:
923 lookahead = lookaheadstack.pop()
924 if not lookahead:
925 lookahead = YaccSymbol()
926 lookahead.type = '$end'
927
928
929 ltype = lookahead.type
930 t = actions[state].get(ltype)
931
932 if t is not None:
933 if t > 0:
934
935 statestack.append(t)
936 state = t
937
938 symstack.append(lookahead)
939 lookahead = None
940
941
942 if errorcount: errorcount -=1
943 continue
944
945 if t < 0:
946
947 p = prod[-t]
948 pname = p.name
949 plen = p.len
950
951
952 sym = YaccSymbol()
953 sym.type = pname
954 sym.value = None
955
956 if plen:
957 targ = symstack[-plen-1:]
958 targ[0] = sym
959
960
961
962
963
964
965 pslice.slice = targ
966
967 try:
968
969 del symstack[-plen:]
970 del statestack[-plen:]
971 p.callable(pslice)
972 symstack.append(sym)
973 state = goto[statestack[-1]][pname]
974 statestack.append(state)
975 except SyntaxError:
976
977 lookaheadstack.append(lookahead)
978 symstack.pop()
979 statestack.pop()
980 state = statestack[-1]
981 sym.type = 'error'
982 lookahead = sym
983 errorcount = error_count
984 self.errorok = 0
985 continue
986
987
988 else:
989
990 targ = [ sym ]
991
992
993
994
995
996
997 pslice.slice = targ
998
999 try:
1000
1001 p.callable(pslice)
1002 symstack.append(sym)
1003 state = goto[statestack[-1]][pname]
1004 statestack.append(state)
1005 except SyntaxError:
1006
1007 lookaheadstack.append(lookahead)
1008 symstack.pop()
1009 statestack.pop()
1010 state = statestack[-1]
1011 sym.type = 'error'
1012 lookahead = sym
1013 errorcount = error_count
1014 self.errorok = 0
1015 continue
1016
1017
1018 if t == 0:
1019 n = symstack[-1]
1020 return getattr(n,"value",None)
1021
1022 if t == None:
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034 if errorcount == 0 or self.errorok:
1035 errorcount = error_count
1036 self.errorok = 0
1037 errtoken = lookahead
1038 if errtoken.type == '$end':
1039 errtoken = None
1040 if self.errorfunc:
1041 global errok,token,restart
1042 errok = self.errok
1043 token = get_token
1044 restart = self.restart
1045 if errtoken and not hasattr(errtoken,'lexer'):
1046 errtoken.lexer = lexer
1047 tok = self.errorfunc(errtoken)
1048 del errok, token, restart
1049
1050 if self.errorok:
1051
1052
1053
1054 lookahead = tok
1055 errtoken = None
1056 continue
1057 else:
1058 if errtoken:
1059 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1060 else: lineno = 0
1061 if lineno:
1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1063 else:
1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1065 else:
1066 sys.stderr.write("yacc: Parse error in input. EOF\n")
1067 return
1068
1069 else:
1070 errorcount = error_count
1071
1072
1073
1074
1075
1076 if len(statestack) <= 1 and lookahead.type != '$end':
1077 lookahead = None
1078 errtoken = None
1079 state = 0
1080
1081 del lookaheadstack[:]
1082 continue
1083
1084
1085
1086
1087
1088 if lookahead.type == '$end':
1089
1090 return
1091
1092 if lookahead.type != 'error':
1093 sym = symstack[-1]
1094 if sym.type == 'error':
1095
1096
1097 lookahead = None
1098 continue
1099 t = YaccSymbol()
1100 t.type = 'error'
1101 if hasattr(lookahead,"lineno"):
1102 t.lineno = lookahead.lineno
1103 t.value = lookahead
1104 lookaheadstack.append(lookahead)
1105 lookahead = t
1106 else:
1107 symstack.pop()
1108 statestack.pop()
1109 state = statestack[-1]
1110
1111 continue
1112
1113
1114 raise RuntimeError("yacc: internal parser error!!!\n")
1115
1116
1117
1118
1119
1120
1121
1122
1123 import re
1124
1125
1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1153 reduced = 0
1154 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1155 self.name = name
1156 self.prod = tuple(prod)
1157 self.number = number
1158 self.func = func
1159 self.callable = None
1160 self.file = file
1161 self.line = line
1162 self.prec = precedence
1163
1164
1165
1166 self.len = len(self.prod)
1167
1168
1169 self.usyms = [ ]
1170 for s in self.prod:
1171 if s not in self.usyms:
1172 self.usyms.append(s)
1173
1174
1175 self.lr_items = []
1176 self.lr_next = None
1177
1178
1179 if self.prod:
1180 self.str = "%s -> %s" % (self.name," ".join(self.prod))
1181 else:
1182 self.str = "%s -> <empty>" % self.name
1183
1186
1188 return "Production("+str(self)+")"
1189
1191 return len(self.prod)
1192
1195
1197 return self.prod[index]
1198
1199
1201 if n > len(self.prod): return None
1202 p = LRItem(self,n)
1203
1204
1205 try:
1206 p.lr_after = Prodnames[p.prod[n+1]]
1207 except (IndexError,KeyError):
1208 p.lr_after = []
1209 try:
1210 p.lr_before = p.prod[n-1]
1211 except IndexError:
1212 p.lr_before = None
1213
1214 return p
1215
1216
1217 - def bind(self,pdict):
1218 if self.func:
1219 self.callable = pdict[self.func]
1220
1221
1222
1223
1224
1226 - def __init__(self,str,name,len,func,file,line):
1227 self.name = name
1228 self.len = len
1229 self.func = func
1230 self.callable = None
1231 self.file = file
1232 self.line = line
1233 self.str = str
1237 return "MiniProduction(%s)" % self.str
1238
1239
1240 - def bind(self,pdict):
1241 if self.func:
1242 self.callable = pdict[self.func]
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1271 self.name = p.name
1272 self.prod = list(p.prod)
1273 self.number = p.number
1274 self.lr_index = n
1275 self.lookaheads = { }
1276 self.prod.insert(n,".")
1277 self.prod = tuple(self.prod)
1278 self.len = len(self.prod)
1279 self.usyms = p.usyms
1280
1282 if self.prod:
1283 s = "%s -> %s" % (self.name," ".join(self.prod))
1284 else:
1285 s = "%s -> <empty>" % self.name
1286 return s
1287
1289 return "LRItem("+str(self)+")"
1290
1291
1292
1293
1294
1295
1297 i = len(symbols) - 1
1298 while i >= 0:
1299 if symbols[i] in terminals:
1300 return symbols[i]
1301 i -= 1
1302 return None
1303
1304
1305
1306
1307
1308
1309
1310
1311
1313
1316 self.Productions = [None]
1317
1318
1319
1320 self.Prodnames = { }
1321
1322
1323 self.Prodmap = { }
1324
1325
1326 self.Terminals = { }
1327
1328
1329 for term in terminals:
1330 self.Terminals[term] = []
1331
1332 self.Terminals['error'] = []
1333
1334 self.Nonterminals = { }
1335
1336
1337 self.First = { }
1338
1339 self.Follow = { }
1340
1341 self.Precedence = { }
1342
1343
1344 self.UsedPrecedence = { }
1345
1346
1347
1348 self.Start = None
1349
1350
1352 return len(self.Productions)
1353
1355 return self.Productions[index]
1356
1357
1358
1359
1360
1361
1362
1363
1364
1366 assert self.Productions == [None],"Must call set_precedence() before add_production()"
1367 if term in self.Precedence:
1368 raise GrammarError("Precedence already specified for terminal '%s'" % term)
1369 if assoc not in ['left','right','nonassoc']:
1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371 self.Precedence[term] = (assoc,level)
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1391
1392 if prodname in self.Terminals:
1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1394 if prodname == 'error':
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1396 if not _is_identifier.match(prodname):
1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1398
1399
1400 for n,s in enumerate(syms):
1401 if s[0] in "'\"":
1402 try:
1403 c = eval(s)
1404 if (len(c) > 1):
1405 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1406 if not c in self.Terminals:
1407 self.Terminals[c] = []
1408 syms[n] = c
1409 continue
1410 except SyntaxError:
1411 pass
1412 if not _is_identifier.match(s) and s != '%prec':
1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1414
1415
1416 if '%prec' in syms:
1417 if syms[-1] == '%prec':
1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1419 if syms[-2] != '%prec':
1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1421 precname = syms[-1]
1422 prodprec = self.Precedence.get(precname,None)
1423 if not prodprec:
1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1425 else:
1426 self.UsedPrecedence[precname] = 1
1427 del syms[-2:]
1428 else:
1429
1430 precname = rightmost_terminal(syms,self.Terminals)
1431 prodprec = self.Precedence.get(precname,('right',0))
1432
1433
1434 map = "%s -> %s" % (prodname,syms)
1435 if map in self.Prodmap:
1436 m = self.Prodmap[map]
1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1438 "Previous definition at %s:%d" % (m.file, m.line))
1439
1440
1441 pnumber = len(self.Productions)
1442 if not prodname in self.Nonterminals:
1443 self.Nonterminals[prodname] = [ ]
1444
1445
1446 for t in syms:
1447 if t in self.Terminals:
1448 self.Terminals[t].append(pnumber)
1449 else:
1450 if not t in self.Nonterminals:
1451 self.Nonterminals[t] = [ ]
1452 self.Nonterminals[t].append(pnumber)
1453
1454
1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1456 self.Productions.append(p)
1457 self.Prodmap[map] = p
1458
1459
1460 try:
1461 self.Prodnames[prodname].append(p)
1462 except KeyError:
1463 self.Prodnames[prodname] = [ p ]
1464 return 0
1465
1466
1467
1468
1469
1470
1471
1472
1481
1482
1483
1484
1485
1486
1487
1488
1490
1491
1492 def mark_reachable_from(s):
1493 if reachable[s]:
1494
1495 return
1496 reachable[s] = 1
1497 for p in self.Prodnames.get(s,[]):
1498 for r in p.prod:
1499 mark_reachable_from(r)
1500
1501 reachable = { }
1502 for s in list(self.Terminals) + list(self.Nonterminals):
1503 reachable[s] = 0
1504
1505 mark_reachable_from( self.Productions[0].prod[0] )
1506
1507 return [s for s in list(self.Nonterminals)
1508 if not reachable[s]]
1509
1510
1511
1512
1513
1514
1515
1516
1517
1519 terminates = {}
1520
1521
1522 for t in self.Terminals:
1523 terminates[t] = 1
1524
1525 terminates['$end'] = 1
1526
1527
1528
1529
1530 for n in self.Nonterminals:
1531 terminates[n] = 0
1532
1533
1534 while 1:
1535 some_change = 0
1536 for (n,pl) in self.Prodnames.items():
1537
1538 for p in pl:
1539
1540 for s in p.prod:
1541 if not terminates[s]:
1542
1543
1544 p_terminates = 0
1545 break
1546 else:
1547
1548
1549
1550 p_terminates = 1
1551
1552 if p_terminates:
1553
1554 if not terminates[n]:
1555 terminates[n] = 1
1556 some_change = 1
1557
1558 break
1559
1560 if not some_change:
1561 break
1562
1563 infinite = []
1564 for (s,term) in terminates.items():
1565 if not term:
1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1567
1568
1569 pass
1570 else:
1571 infinite.append(s)
1572
1573 return infinite
1574
1575
1576
1577
1578
1579
1580
1581
1582
1584 result = []
1585 for p in self.Productions:
1586 if not p: continue
1587
1588 for s in p.prod:
1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1590 result.append((s,p))
1591 return result
1592
1593
1594
1595
1596
1597
1598
1600 unused_tok = []
1601 for s,v in self.Terminals.items():
1602 if s != 'error' and not v:
1603 unused_tok.append(s)
1604
1605 return unused_tok
1606
1607
1608
1609
1610
1611
1612
1613
1615 unused_prod = []
1616 for s,v in self.Nonterminals.items():
1617 if not v:
1618 p = self.Prodnames[s][0]
1619 unused_prod.append(p)
1620 return unused_prod
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1632 unused = []
1633 for termname in self.Precedence:
1634 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1635 unused.append((termname,self.Precedence[termname][0]))
1636
1637 return unused
1638
1639
1640
1641
1642
1643
1644
1645
1646
1648
1649
1650 result = [ ]
1651 for x in beta:
1652 x_produces_empty = 0
1653
1654
1655 for f in self.First[x]:
1656 if f == '<empty>':
1657 x_produces_empty = 1
1658 else:
1659 if f not in result: result.append(f)
1660
1661 if x_produces_empty:
1662
1663
1664 pass
1665 else:
1666
1667 break
1668 else:
1669
1670
1671
1672 result.append('<empty>')
1673
1674 return result
1675
1676
1677
1678
1679
1680
1682 if self.First:
1683 return self.First
1684
1685
1686 for t in self.Terminals:
1687 self.First[t] = [t]
1688
1689 self.First['$end'] = ['$end']
1690
1691
1692
1693
1694 for n in self.Nonterminals:
1695 self.First[n] = []
1696
1697
1698 while 1:
1699 some_change = 0
1700 for n in self.Nonterminals:
1701 for p in self.Prodnames[n]:
1702 for f in self._first(p.prod):
1703 if f not in self.First[n]:
1704 self.First[n].append( f )
1705 some_change = 1
1706 if not some_change:
1707 break
1708
1709 return self.First
1710
1711
1712
1713
1714
1715
1716
1717
1719
1720 if self.Follow:
1721 return self.Follow
1722
1723
1724 if not self.First:
1725 self.compute_first()
1726
1727
1728 for k in self.Nonterminals:
1729 self.Follow[k] = [ ]
1730
1731 if not start:
1732 start = self.Productions[1].name
1733
1734 self.Follow[start] = [ '$end' ]
1735
1736 while 1:
1737 didadd = 0
1738 for p in self.Productions[1:]:
1739
1740 for i in range(len(p.prod)):
1741 B = p.prod[i]
1742 if B in self.Nonterminals:
1743
1744 fst = self._first(p.prod[i+1:])
1745 hasempty = 0
1746 for f in fst:
1747 if f != '<empty>' and f not in self.Follow[B]:
1748 self.Follow[B].append(f)
1749 didadd = 1
1750 if f == '<empty>':
1751 hasempty = 1
1752 if hasempty or i == (len(p.prod)-1):
1753
1754 for f in self.Follow[p.name]:
1755 if f not in self.Follow[B]:
1756 self.Follow[B].append(f)
1757 didadd = 1
1758 if not didadd: break
1759 return self.Follow
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1778 for p in self.Productions:
1779 lastlri = p
1780 i = 0
1781 lr_items = []
1782 while 1:
1783 if i > len(p):
1784 lri = None
1785 else:
1786 lri = LRItem(p,i)
1787
1788 try:
1789 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1790 except (IndexError,KeyError):
1791 lri.lr_after = []
1792 try:
1793 lri.lr_before = lri.prod[i-1]
1794 except IndexError:
1795 lri.lr_before = None
1796
1797 lastlri.lr_next = lri
1798 if not lri: break
1799 lr_items.append(lri)
1800 lastlri = lri
1801 i += 1
1802 p.lr_items = lr_items
1803
1804
1805
1806
1807
1808
1809
1810
1811
1813
1816 self.lr_action = None
1817 self.lr_goto = None
1818 self.lr_productions = None
1819 self.lr_method = None
1820
1822 if isinstance(module,types.ModuleType):
1823 parsetab = module
1824 else:
1825 if sys.version_info[0] < 3:
1826 exec("import %s as parsetab" % module)
1827 else:
1828 env = { }
1829 exec("import %s as parsetab" % module, env, env)
1830 parsetab = env['parsetab']
1831
1832 if parsetab._tabversion != __tabversion__:
1833 raise VersionError("yacc table file version is out of date")
1834
1835 self.lr_action = parsetab._lr_action
1836 self.lr_goto = parsetab._lr_goto
1837
1838 self.lr_productions = []
1839 for p in parsetab._lr_productions:
1840 self.lr_productions.append(MiniProduction(*p))
1841
1842 self.lr_method = parsetab._lr_method
1843 return parsetab._lr_signature
1844
1846 try:
1847 import cPickle as pickle
1848 except ImportError:
1849 import pickle
1850
1851 in_f = open(filename,"rb")
1852
1853 tabversion = pickle.load(in_f)
1854 if tabversion != __tabversion__:
1855 raise VersionError("yacc table file version is out of date")
1856 self.lr_method = pickle.load(in_f)
1857 signature = pickle.load(in_f)
1858 self.lr_action = pickle.load(in_f)
1859 self.lr_goto = pickle.load(in_f)
1860 productions = pickle.load(in_f)
1861
1862 self.lr_productions = []
1863 for p in productions:
1864 self.lr_productions.append(MiniProduction(*p))
1865
1866 in_f.close()
1867 return signature
1868
1869
1871 for p in self.lr_productions:
1872 p.bind(pdict)
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1899 N = { }
1900 for x in X:
1901 N[x] = 0
1902 stack = []
1903 F = { }
1904 for x in X:
1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1906 return F
1907
1909 stack.append(x)
1910 d = len(stack)
1911 N[x] = d
1912 F[x] = FP(x)
1913
1914 rel = R(x)
1915 for y in rel:
1916 if N[y] == 0:
1917 traverse(y,N,stack,F,X,R,FP)
1918 N[x] = min(N[x],N[y])
1919 for a in F.get(y,[]):
1920 if a not in F[x]: F[x].append(a)
1921 if N[x] == d:
1922 N[stack[-1]] = MAXINT
1923 F[stack[-1]] = F[x]
1924 element = stack.pop()
1925 while element != x:
1926 N[stack[-1]] = MAXINT
1927 F[stack[-1]] = F[x]
1928 element = stack.pop()
1929
1931
1932
1933
1934
1935
1936
1937
1938
1940 - def __init__(self,grammar,method='LALR',log=None):
1941 if method not in ['SLR','LALR']:
1942 raise LALRError("Unsupported method %s" % method)
1943
1944 self.grammar = grammar
1945 self.lr_method = method
1946
1947
1948 if not log:
1949 log = NullLogger()
1950 self.log = log
1951
1952
1953 self.lr_action = {}
1954 self.lr_goto = {}
1955 self.lr_productions = grammar.Productions
1956 self.lr_goto_cache = {}
1957 self.lr0_cidhash = {}
1958
1959 self._add_count = 0
1960
1961
1962 self.sr_conflict = 0
1963 self.rr_conflict = 0
1964 self.conflicts = []
1965
1966 self.sr_conflicts = []
1967 self.rr_conflicts = []
1968
1969
1970 self.grammar.build_lritems()
1971 self.grammar.compute_first()
1972 self.grammar.compute_follow()
1973 self.lr_parse_table()
1974
1975
1976
1978 self._add_count += 1
1979
1980
1981 J = I[:]
1982 didadd = 1
1983 while didadd:
1984 didadd = 0
1985 for j in J:
1986 for x in j.lr_after:
1987 if getattr(x,"lr0_added",0) == self._add_count: continue
1988
1989 J.append(x.lr_next)
1990 x.lr0_added = self._add_count
1991 didadd = 1
1992
1993 return J
1994
1995
1996
1997
1998
1999
2000
2001
2003
2004 g = self.lr_goto_cache.get((id(I),x),None)
2005 if g: return g
2006
2007
2008
2009
2010 s = self.lr_goto_cache.get(x,None)
2011 if not s:
2012 s = { }
2013 self.lr_goto_cache[x] = s
2014
2015 gs = [ ]
2016 for p in I:
2017 n = p.lr_next
2018 if n and n.lr_before == x:
2019 s1 = s.get(id(n),None)
2020 if not s1:
2021 s1 = { }
2022 s[id(n)] = s1
2023 gs.append(n)
2024 s = s1
2025 g = s.get('$end',None)
2026 if not g:
2027 if gs:
2028 g = self.lr0_closure(gs)
2029 s['$end'] = g
2030 else:
2031 s['$end'] = gs
2032 self.lr_goto_cache[(id(I),x)] = g
2033 return g
2034
2035
2037
2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2039 i = 0
2040 for I in C:
2041 self.lr0_cidhash[id(I)] = i
2042 i += 1
2043
2044
2045 i = 0
2046 while i < len(C):
2047 I = C[i]
2048 i += 1
2049
2050
2051 asyms = { }
2052 for ii in I:
2053 for s in ii.usyms:
2054 asyms[s] = None
2055
2056 for x in asyms:
2057 g = self.lr0_goto(I,x)
2058 if not g: continue
2059 if id(g) in self.lr0_cidhash: continue
2060 self.lr0_cidhash[id(g)] = len(C)
2061 C.append(g)
2062
2063 return C
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2094 nullable = {}
2095 num_nullable = 0
2096 while 1:
2097 for p in self.grammar.Productions[1:]:
2098 if p.len == 0:
2099 nullable[p.name] = 1
2100 continue
2101 for t in p.prod:
2102 if not t in nullable: break
2103 else:
2104 nullable[p.name] = 1
2105 if len(nullable) == num_nullable: break
2106 num_nullable = len(nullable)
2107 return nullable
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2121 trans = []
2122 for state in range(len(C)):
2123 for p in C[state]:
2124 if p.lr_index < p.len - 1:
2125 t = (state,p.prod[p.lr_index+1])
2126 if t[1] in self.grammar.Nonterminals:
2127 if t not in trans: trans.append(t)
2128 state = state + 1
2129 return trans
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2141 dr_set = { }
2142 state,N = trans
2143 terms = []
2144
2145 g = self.lr0_goto(C[state],N)
2146 for p in g:
2147 if p.lr_index < p.len - 1:
2148 a = p.prod[p.lr_index+1]
2149 if a in self.grammar.Terminals:
2150 if a not in terms: terms.append(a)
2151
2152
2153 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2154 terms.append('$end')
2155
2156 return terms
2157
2158
2159
2160
2161
2162
2163
2165
2166 rel = []
2167 state, N = trans
2168
2169 g = self.lr0_goto(C[state],N)
2170 j = self.lr0_cidhash.get(id(g),-1)
2171 for p in g:
2172 if p.lr_index < p.len - 1:
2173 a = p.prod[p.lr_index + 1]
2174 if a in empty:
2175 rel.append((j,a))
2176
2177 return rel
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2208
2209 lookdict = {}
2210 includedict = {}
2211
2212
2213 dtrans = {}
2214 for t in trans:
2215 dtrans[t] = 1
2216
2217
2218 for state,N in trans:
2219 lookb = []
2220 includes = []
2221 for p in C[state]:
2222 if p.name != N: continue
2223
2224
2225
2226
2227 lr_index = p.lr_index
2228 j = state
2229 while lr_index < p.len - 1:
2230 lr_index = lr_index + 1
2231 t = p.prod[lr_index]
2232
2233
2234 if (j,t) in dtrans:
2235
2236
2237
2238
2239 li = lr_index + 1
2240 while li < p.len:
2241 if p.prod[li] in self.grammar.Terminals: break
2242 if not p.prod[li] in nullable: break
2243 li = li + 1
2244 else:
2245
2246 includes.append((j,t))
2247
2248 g = self.lr0_goto(C[j],t)
2249 j = self.lr0_cidhash.get(id(g),-1)
2250
2251
2252 for r in C[j]:
2253 if r.name != p.name: continue
2254 if r.len != p.len: continue
2255 i = 0
2256
2257 while i < r.lr_index:
2258 if r.prod[i] != p.prod[i+1]: break
2259 i = i + 1
2260 else:
2261 lookb.append((j,r))
2262 for i in includes:
2263 if not i in includedict: includedict[i] = []
2264 includedict[i].append((state,N))
2265 lookdict[(state,N)] = lookb
2266
2267 return lookdict,includedict
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2304 FP = lambda x: readsets[x]
2305 R = lambda x: inclsets.get(x,[])
2306 F = digraph(ntrans,R,FP)
2307 return F
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2322 for trans,lb in lookbacks.items():
2323
2324 for state,p in lb:
2325 if not state in p.lookaheads:
2326 p.lookaheads[state] = []
2327 f = followset.get(trans,[])
2328 for a in f:
2329 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2330
2331
2332
2333
2334
2335
2336
2337
2356
2357
2358
2359
2360
2361
2363 Productions = self.grammar.Productions
2364 Precedence = self.grammar.Precedence
2365 goto = self.lr_goto
2366 action = self.lr_action
2367 log = self.log
2368
2369 actionp = { }
2370
2371 log.info("Parsing method: %s", self.lr_method)
2372
2373
2374
2375
2376 C = self.lr0_items()
2377
2378 if self.lr_method == 'LALR':
2379 self.add_lalr_lookaheads(C)
2380
2381
2382 st = 0
2383 for I in C:
2384
2385 actlist = [ ]
2386 st_action = { }
2387 st_actionp = { }
2388 st_goto = { }
2389 log.info("")
2390 log.info("state %d", st)
2391 log.info("")
2392 for p in I:
2393 log.info(" (%d) %s", p.number, str(p))
2394 log.info("")
2395
2396 for p in I:
2397 if p.len == p.lr_index + 1:
2398 if p.name == "S'":
2399
2400 st_action["$end"] = 0
2401 st_actionp["$end"] = p
2402 else:
2403
2404 if self.lr_method == 'LALR':
2405 laheads = p.lookaheads[st]
2406 else:
2407 laheads = self.grammar.Follow[p.name]
2408 for a in laheads:
2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2410 r = st_action.get(a,None)
2411 if r is not None:
2412
2413 if r > 0:
2414
2415
2416
2417 sprec,slevel = Productions[st_actionp[a].number].prec
2418 rprec,rlevel = Precedence.get(a,('right',0))
2419 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2420
2421 st_action[a] = -p.number
2422 st_actionp[a] = p
2423 if not slevel and not rlevel:
2424 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2425 self.sr_conflicts.append((st,a,'reduce'))
2426 Productions[p.number].reduced += 1
2427 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2428 st_action[a] = None
2429 else:
2430
2431 if not rlevel:
2432 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2433 self.sr_conflicts.append((st,a,'shift'))
2434 elif r < 0:
2435
2436
2437 oldp = Productions[-r]
2438 pp = Productions[p.number]
2439 if oldp.line > pp.line:
2440 st_action[a] = -p.number
2441 st_actionp[a] = p
2442 chosenp,rejectp = pp,oldp
2443 Productions[p.number].reduced += 1
2444 Productions[oldp.number].reduced -= 1
2445 else:
2446 chosenp,rejectp = oldp,pp
2447 self.rr_conflicts.append((st,chosenp,rejectp))
2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2449 else:
2450 raise LALRError("Unknown conflict in state %d" % st)
2451 else:
2452 st_action[a] = -p.number
2453 st_actionp[a] = p
2454 Productions[p.number].reduced += 1
2455 else:
2456 i = p.lr_index
2457 a = p.prod[i+1]
2458 if a in self.grammar.Terminals:
2459 g = self.lr0_goto(I,a)
2460 j = self.lr0_cidhash.get(id(g),-1)
2461 if j >= 0:
2462
2463 actlist.append((a,p,"shift and go to state %d" % j))
2464 r = st_action.get(a,None)
2465 if r is not None:
2466
2467 if r > 0:
2468 if r != j:
2469 raise LALRError("Shift/shift conflict in state %d" % st)
2470 elif r < 0:
2471
2472
2473
2474
2475 rprec,rlevel = Productions[st_actionp[a].number].prec
2476 sprec,slevel = Precedence.get(a,('right',0))
2477 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2478
2479 Productions[st_actionp[a].number].reduced -= 1
2480 st_action[a] = j
2481 st_actionp[a] = p
2482 if not rlevel:
2483 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2484 self.sr_conflicts.append((st,a,'shift'))
2485 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2486 st_action[a] = None
2487 else:
2488
2489 if not slevel and not rlevel:
2490 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2491 self.sr_conflicts.append((st,a,'reduce'))
2492
2493 else:
2494 raise LALRError("Unknown conflict in state %d" % st)
2495 else:
2496 st_action[a] = j
2497 st_actionp[a] = p
2498
2499
2500 _actprint = { }
2501 for a,p,m in actlist:
2502 if a in st_action:
2503 if p is st_actionp[a]:
2504 log.info(" %-15s %s",a,m)
2505 _actprint[(a,m)] = 1
2506 log.info("")
2507
2508 not_used = 0
2509 for a,p,m in actlist:
2510 if a in st_action:
2511 if p is not st_actionp[a]:
2512 if not (a,m) in _actprint:
2513 log.debug(" ! %-15s [ %s ]",a,m)
2514 not_used = 1
2515 _actprint[(a,m)] = 1
2516 if not_used:
2517 log.debug("")
2518
2519
2520
2521 nkeys = { }
2522 for ii in I:
2523 for s in ii.usyms:
2524 if s in self.grammar.Nonterminals:
2525 nkeys[s] = None
2526 for n in nkeys:
2527 g = self.lr0_goto(I,n)
2528 j = self.lr0_cidhash.get(id(g),-1)
2529 if j >= 0:
2530 st_goto[n] = j
2531 log.info(" %-30s shift and go to state %d",n,j)
2532
2533 action[st] = st_action
2534 actionp[st] = st_actionp
2535 goto[st] = st_goto
2536 st += 1
2537
2538
2539
2540
2541
2542
2543
2544
2545 - def write_table(self,modulename,outputdir='',signature=""):
2546 basemodulename = modulename.split(".")[-1]
2547 filename = os.path.join(outputdir,basemodulename) + ".py"
2548 try:
2549 f = open(filename,"w")
2550
2551 f.write("""
2552 # %s
2553 # This file is automatically generated. Do not edit.
2554 _tabversion = %r
2555
2556 _lr_method = %r
2557
2558 _lr_signature = %r
2559 """ % (filename, __tabversion__, self.lr_method, signature))
2560
2561
2562 smaller = 1
2563
2564
2565 if smaller:
2566 items = { }
2567
2568 for s,nd in self.lr_action.items():
2569 for name,v in nd.items():
2570 i = items.get(name)
2571 if not i:
2572 i = ([],[])
2573 items[name] = i
2574 i[0].append(s)
2575 i[1].append(v)
2576
2577 f.write("\n_lr_action_items = {")
2578 for k,v in items.items():
2579 f.write("%r:([" % k)
2580 for i in v[0]:
2581 f.write("%r," % i)
2582 f.write("],[")
2583 for i in v[1]:
2584 f.write("%r," % i)
2585
2586 f.write("]),")
2587 f.write("}\n")
2588
2589 f.write("""
2590 _lr_action = { }
2591 for _k, _v in _lr_action_items.items():
2592 for _x,_y in zip(_v[0],_v[1]):
2593 if not _x in _lr_action: _lr_action[_x] = { }
2594 _lr_action[_x][_k] = _y
2595 del _lr_action_items
2596 """)
2597
2598 else:
2599 f.write("\n_lr_action = { ");
2600 for k,v in self.lr_action.items():
2601 f.write("(%r,%r):%r," % (k[0],k[1],v))
2602 f.write("}\n");
2603
2604 if smaller:
2605
2606 items = { }
2607
2608 for s,nd in self.lr_goto.items():
2609 for name,v in nd.items():
2610 i = items.get(name)
2611 if not i:
2612 i = ([],[])
2613 items[name] = i
2614 i[0].append(s)
2615 i[1].append(v)
2616
2617 f.write("\n_lr_goto_items = {")
2618 for k,v in items.items():
2619 f.write("%r:([" % k)
2620 for i in v[0]:
2621 f.write("%r," % i)
2622 f.write("],[")
2623 for i in v[1]:
2624 f.write("%r," % i)
2625
2626 f.write("]),")
2627 f.write("}\n")
2628
2629 f.write("""
2630 _lr_goto = { }
2631 for _k, _v in _lr_goto_items.items():
2632 for _x,_y in zip(_v[0],_v[1]):
2633 if not _x in _lr_goto: _lr_goto[_x] = { }
2634 _lr_goto[_x][_k] = _y
2635 del _lr_goto_items
2636 """)
2637 else:
2638 f.write("\n_lr_goto = { ");
2639 for k,v in self.lr_goto.items():
2640 f.write("(%r,%r):%r," % (k[0],k[1],v))
2641 f.write("}\n");
2642
2643
2644 f.write("_lr_productions = [\n")
2645 for p in self.lr_productions:
2646 if p.func:
2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2648 else:
2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2650 f.write("]\n")
2651 f.close()
2652
2653 except IOError:
2654 e = sys.exc_info()[1]
2655 sys.stderr.write("Unable to create '%s'\n" % filename)
2656 sys.stderr.write(str(e)+"\n")
2657 return
2658
2659
2660
2661
2662
2663
2664
2665
2667 try:
2668 import cPickle as pickle
2669 except ImportError:
2670 import pickle
2671 outf = open(filename,"wb")
2672 pickle.dump(__tabversion__,outf,pickle_protocol)
2673 pickle.dump(self.lr_method,outf,pickle_protocol)
2674 pickle.dump(signature,outf,pickle_protocol)
2675 pickle.dump(self.lr_action,outf,pickle_protocol)
2676 pickle.dump(self.lr_goto,outf,pickle_protocol)
2677
2678 outp = []
2679 for p in self.lr_productions:
2680 if p.func:
2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2682 else:
2683 outp.append((str(p),p.name,p.len,None,None,None))
2684 pickle.dump(outp,outf,pickle_protocol)
2685 outf.close()
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2703 try:
2704 raise RuntimeError
2705 except RuntimeError:
2706 e,b,t = sys.exc_info()
2707 f = t.tb_frame
2708 while levels > 0:
2709 f = f.f_back
2710 levels -= 1
2711 ldict = f.f_globals.copy()
2712 if f.f_globals != f.f_locals:
2713 ldict.update(f.f_locals)
2714
2715 return ldict
2716
2717
2718
2719
2720
2721
2723 grammar = []
2724
2725 pstrings = doc.splitlines()
2726 lastp = None
2727 dline = line
2728 for ps in pstrings:
2729 dline += 1
2730 p = ps.split()
2731 if not p: continue
2732 try:
2733 if p[0] == '|':
2734
2735 if not lastp:
2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2737 prodname = lastp
2738 syms = p[1:]
2739 else:
2740 prodname = p[0]
2741 lastp = prodname
2742 syms = p[2:]
2743 assign = p[1]
2744 if assign != ':' and assign != '::=':
2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2746
2747 grammar.append((file,dline,prodname,syms))
2748 except SyntaxError:
2749 raise
2750 except Exception:
2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2752
2753 return grammar
2754
2755
2756
2757
2758
2759
2760
2761
2764 self.pdict = pdict
2765 self.start = None
2766 self.error_func = None
2767 self.tokens = None
2768 self.files = {}
2769 self.grammar = []
2770 self.error = 0
2771
2772 if log is None:
2773 self.log = PlyLogger(sys.stderr)
2774 else:
2775 self.log = log
2776
2777
2784
2785
2794
2795
2797 try:
2798 from hashlib import md5
2799 except ImportError:
2800 from md5 import md5
2801 try:
2802 sig = md5()
2803 if self.start:
2804 sig.update(self.start.encode('latin-1'))
2805 if self.prec:
2806 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2807 if self.tokens:
2808 sig.update(" ".join(self.tokens).encode('latin-1'))
2809 for f in self.pfuncs:
2810 if f[3]:
2811 sig.update(f[3].encode('latin-1'))
2812 except (TypeError,ValueError):
2813 pass
2814 return sig.digest()
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2828
2829 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2830
2831 for filename in self.files.keys():
2832 base,ext = os.path.splitext(filename)
2833 if ext != '.py': return 1
2834
2835 try:
2836 f = open(filename)
2837 lines = f.readlines()
2838 f.close()
2839 except IOError:
2840 continue
2841
2842 counthash = { }
2843 for linen,l in enumerate(lines):
2844 linen += 1
2845 m = fre.match(l)
2846 if m:
2847 name = m.group(1)
2848 prev = counthash.get(name)
2849 if not prev:
2850 counthash[name] = linen
2851 else:
2852 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2853
2854
2856 self.start = self.pdict.get('start')
2857
2858
2860 if self.start is not None:
2861 if not isinstance(self.start,str):
2862 self.log.error("'start' must be a string")
2863
2864
2866 self.error_func = self.pdict.get('p_error')
2867
2868
2870 if self.error_func:
2871 if isinstance(self.error_func,types.FunctionType):
2872 ismethod = 0
2873 elif isinstance(self.error_func, types.MethodType):
2874 ismethod = 1
2875 else:
2876 self.log.error("'p_error' defined, but is not a function or method")
2877 self.error = 1
2878 return
2879
2880 eline = func_code(self.error_func).co_firstlineno
2881 efile = func_code(self.error_func).co_filename
2882 self.files[efile] = 1
2883
2884 if (func_code(self.error_func).co_argcount != 1+ismethod):
2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2886 self.error = 1
2887
2888
2890 tokens = self.pdict.get("tokens",None)
2891 if not tokens:
2892 self.log.error("No token list is defined")
2893 self.error = 1
2894 return
2895
2896 if not isinstance(tokens,(list, tuple)):
2897 self.log.error("tokens must be a list or tuple")
2898 self.error = 1
2899 return
2900
2901 if not tokens:
2902 self.log.error("tokens is empty")
2903 self.error = 1
2904 return
2905
2906 self.tokens = tokens
2907
2908
2910
2911 if 'error' in self.tokens:
2912 self.log.error("Illegal token name 'error'. Is a reserved word")
2913 self.error = 1
2914 return
2915
2916 terminals = {}
2917 for n in self.tokens:
2918 if n in terminals:
2919 self.log.warning("Token '%s' multiply defined", n)
2920 terminals[n] = 1
2921
2922
2924 self.prec = self.pdict.get("precedence",None)
2925
2926
2928 preclist = []
2929 if self.prec:
2930 if not isinstance(self.prec,(list,tuple)):
2931 self.log.error("precedence must be a list or tuple")
2932 self.error = 1
2933 return
2934 for level,p in enumerate(self.prec):
2935 if not isinstance(p,(list,tuple)):
2936 self.log.error("Bad precedence table")
2937 self.error = 1
2938 return
2939
2940 if len(p) < 2:
2941 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2942 self.error = 1
2943 return
2944 assoc = p[0]
2945 if not isinstance(assoc,str):
2946 self.log.error("precedence associativity must be a string")
2947 self.error = 1
2948 return
2949 for term in p[1:]:
2950 if not isinstance(term,str):
2951 self.log.error("precedence items must be strings")
2952 self.error = 1
2953 return
2954 preclist.append((term,assoc,level+1))
2955 self.preclist = preclist
2956
2957
2959 p_functions = []
2960 for name, item in self.pdict.items():
2961 if name[:2] != 'p_': continue
2962 if name == 'p_error': continue
2963 if isinstance(item,(types.FunctionType,types.MethodType)):
2964 line = func_code(item).co_firstlineno
2965 file = func_code(item).co_filename
2966 p_functions.append((line,file,name,item.__doc__))
2967
2968
2969 p_functions.sort()
2970 self.pfuncs = p_functions
2971
2972
2973
2975 grammar = []
2976
2977 if len(self.pfuncs) == 0:
2978 self.log.error("no rules of the form p_rulename are defined")
2979 self.error = 1
2980 return
2981
2982 for line, file, name, doc in self.pfuncs:
2983 func = self.pdict[name]
2984 if isinstance(func, types.MethodType):
2985 reqargs = 2
2986 else:
2987 reqargs = 1
2988 if func_code(func).co_argcount > reqargs:
2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2990 self.error = 1
2991 elif func_code(func).co_argcount < reqargs:
2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2993 self.error = 1
2994 elif not func.__doc__:
2995 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2996 else:
2997 try:
2998 parsed_g = parse_grammar(doc,file,line)
2999 for g in parsed_g:
3000 grammar.append((name, g))
3001 except SyntaxError:
3002 e = sys.exc_info()[1]
3003 self.log.error(str(e))
3004 self.error = 1
3005
3006
3007
3008 self.files[file] = 1
3009
3010
3011
3012
3013 for n,v in self.pdict.items():
3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3015 if n[0:2] == 't_': continue
3016 if n[0:2] == 'p_' and n != 'p_error':
3017 self.log.warning("'%s' not defined as a function", n)
3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3020 try:
3021 doc = v.__doc__.split(" ")
3022 if doc[1] == ':':
3023 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024 func_code(v).co_filename, func_code(v).co_firstlineno,n)
3025 except Exception:
3026 pass
3027
3028 self.grammar = grammar
3029
3030
3031
3032
3033
3034
3035
3036 -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3038 debuglog=None, errorlog = None, picklefile=None):
3039
3040 global parse
3041
3042
3043
3044 if picklefile:
3045 write_tables = 0
3046
3047 if errorlog is None:
3048 errorlog = PlyLogger(sys.stderr)
3049
3050
3051 if module:
3052 _items = [(k,getattr(module,k)) for k in dir(module)]
3053 pdict = dict(_items)
3054 else:
3055 pdict = get_caller_module_dict(2)
3056
3057
3058 pinfo = ParserReflect(pdict,log=errorlog)
3059 pinfo.get_all()
3060
3061 if pinfo.error:
3062 raise YaccError("Unable to build parser")
3063
3064
3065 signature = pinfo.signature()
3066
3067
3068 try:
3069 lr = LRTable()
3070 if picklefile:
3071 read_signature = lr.read_pickle(picklefile)
3072 else:
3073 read_signature = lr.read_table(tabmodule)
3074 if optimize or (read_signature == signature):
3075 try:
3076 lr.bind_callables(pinfo.pdict)
3077 parser = LRParser(lr,pinfo.error_func)
3078 parse = parser.parse
3079 return parser
3080 except Exception:
3081 e = sys.exc_info()[1]
3082 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3083 except VersionError:
3084 e = sys.exc_info()
3085 errorlog.warning(str(e))
3086 except Exception:
3087 pass
3088
3089 if debuglog is None:
3090 if debug:
3091 debuglog = PlyLogger(open(debugfile,"w"))
3092 else:
3093 debuglog = NullLogger()
3094
3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3096
3097
3098 errors = 0
3099
3100
3101 if pinfo.validate_all():
3102 raise YaccError("Unable to build parser")
3103
3104 if not pinfo.error_func:
3105 errorlog.warning("no p_error() function is defined")
3106
3107
3108 grammar = Grammar(pinfo.tokens)
3109
3110
3111 for term, assoc, level in pinfo.preclist:
3112 try:
3113 grammar.set_precedence(term,assoc,level)
3114 except GrammarError:
3115 e = sys.exc_info()[1]
3116 errorlog.warning("%s",str(e))
3117
3118
3119 for funcname, gram in pinfo.grammar:
3120 file, line, prodname, syms = gram
3121 try:
3122 grammar.add_production(prodname,syms,funcname,file,line)
3123 except GrammarError:
3124 e = sys.exc_info()[1]
3125 errorlog.error("%s",str(e))
3126 errors = 1
3127
3128
3129 try:
3130 if start is None:
3131 grammar.set_start(pinfo.start)
3132 else:
3133 grammar.set_start(start)
3134 except GrammarError:
3135 e = sys.exc_info()[1]
3136 errorlog.error(str(e))
3137 errors = 1
3138
3139 if errors:
3140 raise YaccError("Unable to build parser")
3141
3142
3143 undefined_symbols = grammar.undefined_symbols()
3144 for sym, prod in undefined_symbols:
3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3146 errors = 1
3147
3148 unused_terminals = grammar.unused_terminals()
3149 if unused_terminals:
3150 debuglog.info("")
3151 debuglog.info("Unused terminals:")
3152 debuglog.info("")
3153 for term in unused_terminals:
3154 errorlog.warning("Token '%s' defined, but not used", term)
3155 debuglog.info(" %s", term)
3156
3157
3158 if debug:
3159 debuglog.info("")
3160 debuglog.info("Grammar")
3161 debuglog.info("")
3162 for n,p in enumerate(grammar.Productions):
3163 debuglog.info("Rule %-5d %s", n, p)
3164
3165
3166 unused_rules = grammar.unused_rules()
3167
3168
3169
3170 if len(unused_terminals) == 1:
3171 errorlog.warning("There is 1 unused token")
3172 if len(unused_terminals) > 1:
3173 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3174
3175
3176
3177
3178
3179
3180 if debug:
3181 debuglog.info("")
3182 debuglog.info("Terminals, with rules where they appear")
3183 debuglog.info("")
3184 terms = list(grammar.Terminals)
3185 terms.sort()
3186 for term in terms:
3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3188
3189 debuglog.info("")
3190 debuglog.info("Nonterminals, with rules where they appear")
3191 debuglog.info("")
3192 nonterms = list(grammar.Nonterminals)
3193 nonterms.sort()
3194 for nonterm in nonterms:
3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3196 debuglog.info("")
3197
3198 if check_recursion:
3199 unreachable = grammar.find_unreachable()
3200 for u in unreachable:
3201 errorlog.warning("Symbol '%s' is unreachable",u)
3202
3203 infinite = grammar.infinite_cycles()
3204 for inf in infinite:
3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3206 errors = 1
3207
3208 unused_prec = grammar.unused_precedence()
3209 for term, assoc in unused_prec:
3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3211 errors = 1
3212
3213 if errors:
3214 raise YaccError("Unable to build parser")
3215
3216
3217 if debug:
3218 errorlog.debug("Generating %s tables", method)
3219
3220 lr = LRGeneratedTable(grammar,method,debuglog)
3221
3222 if debug:
3223 num_sr = len(lr.sr_conflicts)
3224
3225
3226 if num_sr == 1:
3227 errorlog.warning("1 shift/reduce conflict")
3228 elif num_sr > 1:
3229 errorlog.warning("%d shift/reduce conflicts", num_sr)
3230
3231 num_rr = len(lr.rr_conflicts)
3232 if num_rr == 1:
3233 errorlog.warning("1 reduce/reduce conflict")
3234 elif num_rr > 1:
3235 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3236
3237
3238 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3239 debuglog.warning("")
3240 debuglog.warning("Conflicts:")
3241 debuglog.warning("")
3242
3243 for state, tok, resolution in lr.sr_conflicts:
3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3245
3246 already_reported = {}
3247 for state, rule, rejected in lr.rr_conflicts:
3248 if (state,id(rule),id(rejected)) in already_reported:
3249 continue
3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3254 already_reported[state,id(rule),id(rejected)] = 1
3255
3256 warned_never = []
3257 for state, rule, rejected in lr.rr_conflicts:
3258 if not rejected.reduced and (rejected not in warned_never):
3259 debuglog.warning("Rule (%s) is never reduced", rejected)
3260 errorlog.warning("Rule (%s) is never reduced", rejected)
3261 warned_never.append(rejected)
3262
3263
3264 if write_tables:
3265 lr.write_table(tabmodule,outputdir,signature)
3266
3267
3268 if picklefile:
3269 lr.pickle_table(picklefile,signature)
3270
3271
3272 lr.bind_callables(pinfo.pdict)
3273 parser = LRParser(lr,pinfo.error_func)
3274
3275 parse = parser.parse
3276 return parser
3277