File: Synopsis/Parsers/Python/SXRGenerator.py 1
2
3
4
5
6
7
8import parser
9import token
10import tokenize
11import symbol
12import keyword
13
14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl")
15HAVE_IMPORT_NAME = hasattr(symbol, "import_name")
16HAVE_DECORATOR = hasattr(symbol,"decorator")
17
18def num_tokens(ptree):
19 """Count the number of leaf tokens in the given ptree."""
20
21 if type(ptree) == str: return 1
22 else: return sum([num_tokens(n) for n in ptree[1:]])
23
24
25class LexerDebugger:
26
27 def __init__(self, lexer):
28
29 self.lexer = lexer
30
31 def next(self):
32
33 n = self.lexer.next()
34 print 'next is "%s" (%s)'%(n[1], n[0])
35 return n
36
37header="""<sxr filename="%(filename)s">
38<line>"""
39
40trailer="""</line>
41</sxr>
42"""
43
44def escape(text):
45
46 for p in [('&', '&'), ('"', '"'), ('<', '<'), ('>', '>'),]:
47 text = text.replace(*p)
48 return text
49
50
51class SXRGenerator:
52 """"""
53
54 def __init__(self, debug=False):
55 """"""
56
57 self.debug = debug
58 self.handlers = {}
59 self.handlers[token.ENDMARKER] = self.handle_end_marker
60 self.handlers[token.NEWLINE] = self.handle_newline
61 self.handlers[token.INDENT] = self.handle_indent
62 self.handlers[token.DEDENT] = self.handle_dedent
63 self.handlers[token.STRING] = self.handle_string
64 self.handlers[symbol.funcdef]= self.handle_function
65 self.handlers[symbol.parameters] = self.handle_parameters
66 self.handlers[symbol.classdef] = self.handle_class
67 self.handlers[token.NAME] = self.handle_name
68 self.handlers[symbol.expr_stmt] = self.handle_expr_stmt
69
70 self.handlers[symbol.power] = self.handle_power
71 if HAVE_ENCODING_DECL:
72 self.handlers[symbol.encoding_decl] = self.handle_encoding_decl
73 if HAVE_IMPORT_NAME:
74 self.handlers[symbol.import_as_names] = self.handle_import_as_names
75 self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names
76 self.handlers[symbol.import_from] = self.handle_import_from
77 self.handlers[symbol.import_name] = self.handle_import_name
78 else:
79 self.handlers[symbol.import_stmt] = self.handle_import
80 if HAVE_DECORATOR:
81 self.handlers[symbol.decorator] = self.handle_decorator
82
83 self.col = 0
84 self.lineno = 1
85 self.parameters = []
86 self.scopes = []
87
88 def process_file(self, scope, sourcefile, sxr):
89
90 self.scopes = list(scope)
91 input = open(sourcefile.abs_name, 'r+')
92 src = input.readlines()
93 self.lines = len(`len(src) + 1`)
94 ptree = parser.ast2tuple(parser.suite(''.join(src)))
95 input.seek(0)
96
97
98
99 self.lexer = tokenize.generate_tokens(input.readline)
100 self.sxr = open(sxr, 'w+')
101 lineno_template = '%%%ds' % self.lines
102 lineno = lineno_template % self.lineno
103 self.sxr.write(header % {'filename': sourcefile.name})
104 try:
105 self.handle(ptree)
106 except StopIteration:
107 raise
108 self.sxr.write(trailer)
109 self.sxr.close()
110 self.scopes.pop()
111
112 def handle(self, ptree):
113
114 if type(ptree) == tuple:
115 kind = ptree[0]
116 value = ptree[1:]
117 handler = self.handlers.get(kind, self.default_handler)
118 if self.debug:
119 print handler.__name__
120 handler(value)
121 else:
122 raise Exception("Process error: Type is not a tuple %s" % str(ptree))
123
124
125 def default_handler(self, ptree):
126
127 for node in ptree:
128 if type(node) == tuple: self.handle(node)
129 elif type(node) == str: self.handle_token(node)
130 else: raise Exception("Invalid ptree node")
131
132
133 def next_token(self):
134 """Return the next visible token.
135 Process tokens that are not part of the parse tree silently."""
136
137 t = self.lexer.next()
138 while t[0] in [tokenize.NL, tokenize.COMMENT]:
139 if t[0] is tokenize.NL:
140 self.print_newline()
141 elif t[0] is tokenize.COMMENT:
142 self.print_token(t)
143 if t[1][-1] == '\n': self.print_newline()
144 t = self.lexer.next()
145 return t
146
147
148 def handle_token(self, item = None):
149
150 t = self.next_token()
151 if item is not None and t[1] != item:
152 raise Exception('Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0]))
153 else:
154 self.print_token(t)
155
156
157 def handle_name_as_xref(self, xref, name, from_ = None, type = None):
158
159 kind, value, (srow, scol), (erow, ecol), line = self.next_token()
160 if (kind, value) != (token.NAME, name):
161 raise Exception('Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0]))
162
163 if self.col != scol:
164 self.sxr.write(' ' * (scol - self.col))
165 attrs = []
166 if from_: attrs.append('from="%s"'%from_)
167 if type: attrs.append('type="%s"'%type)
168 a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value)
169 self.sxr.write(a)
170 self.col = ecol
171
172
173 def handle_tokens(self, ptree):
174
175 tokens = num_tokens(ptree)
176 for i in xrange(tokens):
177 self.handle_token()
178
179
180 def handle_end_marker(self, nodes): pass
181 def handle_newline(self, nodes):
182
183 self.handle_token()
184
185
186 def handle_indent(self, indent):
187
188 self.handle_token()
189
190
191 def handle_dedent(self, dedent):
192
193 self.handle_token()
194
195
196 def handle_string(self, content):
197
198 self.handle_token()
199
200
201 def handle_function(self, nodes):
202
203 if HAVE_DECORATOR:
204 if nodes[0][0] == symbol.decorators:
205 offset = 1
206
207 self.handle(nodes[0])
208 else:
209 offset = 0
210 else:
211 offset = 0
212
213 def_token = nodes[0 + offset]
214 self.handle_token(def_token[1])
215 name = nodes[1 + offset][1]
216 qname = tuple(self.scopes + [name])
217 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
218
219 self.handle(nodes[2 + offset])
220
221 colon_token = nodes[3 + offset]
222 self.handle_token(colon_token[1])
223 body = nodes[4 + offset]
224
225
226 self.handle_tokens(body)
227
228
229
230
231
232 def handle_parameters(self, nodes):
233
234 self.handle_token(nodes[0][1])
235 if nodes[1][0] == symbol.varargslist:
236 args = list(nodes[1][1:])
237 while args:
238 if args[0][0] == token.COMMA:
239 self.handle_token(args[0][1])
240 pass
241 elif args[0][0] == symbol.fpdef:
242 self.handle_tokens(args[0])
243 elif args[0][0] == token.EQUAL:
244 self.handle_token(args[0][1])
245 del args[0]
246 self.handle_tokens(args[0])
247 elif args[0][0] == token.DOUBLESTAR:
248 self.handle_token(args[0][1])
249 del args[0]
250 self.handle_token(args[0][1])
251 elif args[0][0] == token.STAR:
252 self.handle_token(args[0][1])
253 del args[0]
254 self.handle_token(args[0][1])
255 else:
256 print "Unknown symbol:",args[0]
257 del args[0]
258 self.handle_token(nodes[-1][1])
259
260
261 def handle_class(self, nodes):
262
263 class_token = nodes[0]
264 self.handle_token(class_token[1])
265 name = nodes[1][1]
266 qname = tuple(self.scopes + [name])
267 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
268 base_clause = nodes[2][0] == token.LPAR and nodes[3] or None
269 self.handle_tokens(nodes[2])
270 bases = []
271 if base_clause:
272 self.handle_tokens(base_clause)
273 self.handle_token(')')
274 self.handle_token(':')
275
276 body = nodes[6]
277 else:
278 body = nodes[3]
279 self.scopes.append(name)
280 self.handle(body)
281 self.scopes.pop()
282
283
284 def handle_name(self, content):
285
286 self.handle_token(content[0])
287
288
289 def handle_expr_stmt(self, nodes):
290
291 for n in nodes: self.handle_tokens(n)
292
293
294 def handle_dotted_name(self, dname, rest):
295
296 self.handle_token(dname[0])
297 for name in dname[1:]:
298 self.handle_token('.')
299 self.handle_token(name)
300 map(self.handle, rest)
301
302
303 def handle_op(self, nodes): pass
304
305
306 def handle_power(self, content):
307
308 def get_dotted_name(content):
309 if content[0][0] != symbol.atom or content[0][1][0] != token.NAME:
310 return None
311 dotted_name = [content[0][1][1]]
312 i = 1
313 for param in content[1:]:
314 if param[0] != symbol.trailer: break
315 if param[1][0] != token.DOT: break
316 if param[2][0] != token.NAME: break
317 dotted_name.append(param[2][1])
318 i += 1
319 if i < len(content): return dotted_name, content[i:]
320 else: return dotted_name, []
321
322 name = get_dotted_name(content)
323 if name: self.handle_dotted_name(*name)
324 else: map(self.handle, content)
325
326
327 def handle_encoding_decl(self, nodes):
328
329
330 for n in nodes[:-1]: self.handle(n)
331
332 def handle_import_as_names(self, nodes):
333
334 for n in nodes: self.handle(n)
335
336
337 def handle_dotted_as_names(self, nodes):
338
339 for n in nodes: self.handle(n)
340
341
342 def handle_import_from(self, nodes):
343
344 self.handle_token('from')
345 self.handle(nodes[1])
346 self.handle_token('import')
347 self.handle(nodes[3])
348
349
350 def handle_import_name(self, nodes):
351
352 self.handle_token('import')
353 self.handle_dotted_as_names(nodes[1][1:])
354
355
356 def handle_import(self, nodes):
357
358
359 for n in nodes: self.handle(n)
360
361
362 def handle_decorator(self, nodes): pass
363
364
365 def print_token(self, t):
366
367 kind, value, (srow, scol), (erow, ecol), line = t
368 if kind == token.NEWLINE:
369 self.print_newline()
370 else:
371 if self.col != scol:
372 self.sxr.write(' ' * (scol - self.col))
373 if keyword.iskeyword(value):
374 format = '<span class="py-keyword">%s</span>'
375 elif kind == token.STRING:
376 format = '<span class="py-string">%s</span>'
377 chunks = value.split('\n')
378 for c in chunks[:-1]:
379 self.sxr.write(format % escape(c))
380 self.print_newline()
381 value = chunks[-1]
382
383 elif kind == tokenize.COMMENT:
384 format = '<span class="py-comment">%s</span>'
385 if value[-1] == '\n': value = value[:-1]
386 else:
387 format = '%s'
388
389 self.sxr.write(format % escape(value))
390 self.col = ecol
391
392
393 def print_newline(self):
394
395 self.col = 0
396 self.lineno += 1
397 self.sxr.write('</line>\n')
398 self.sxr.write('<line>')
399
400
401
Generated on Tue Jul 20 09:07:15 2010 by
synopsis (version devel)