File: Synopsis/Parsers/Python/SXRGenerator.py
  1#
  2# Copyright (C) 2008 Stefan Seefeld
  3# All rights reserved.
  4# Licensed to the public under the terms of the GNU LGPL (>= 2),
  5# see the file COPYING for details.
  6#
  7
  8import parser
  9import token
 10import tokenize
 11import symbol
 12import keyword
 13
 14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl") # python 2.3
 15HAVE_IMPORT_NAME = hasattr(symbol, "import_name") # python 2.4
 16HAVE_DECORATOR = hasattr(symbol,"decorator") # python 2.4
 17
 18def num_tokens(ptree):
 19    """Count the number of leaf tokens in the given ptree."""
 20
 21    if type(ptree) == str: return 1
 22    else: return sum([num_tokens(n) for n in ptree[1:]])
 23
 24
 25class LexerDebugger:
 26
 27    def __init__(self, lexer):
 28
 29        self.lexer = lexer
 30
 31    def next(self):
 32
 33        n = self.lexer.next()
 34        print 'next is "%s" (%s)'%(n[1], n[0])
 35        return n
 36
 37header="""<sxr filename="%(filename)s">
 38<line>"""
 39
 40trailer="""</line>
 41</sxr>
 42"""
 43
 44def escape(text):
 45
 46    for p in [('&', '&amp;'), ('"', '&quot;'), ('<', '&lt;'), ('>', '&gt;'),]:
 47        text = text.replace(*p)
 48    return text
 49
 50
 51class SXRGenerator:
 52    """"""
 53
 54    def __init__(self, debug=False):
 55        """"""
 56
 57        self.debug = debug
 58        self.handlers = {}
 59        self.handlers[token.ENDMARKER] = self.handle_end_marker
 60        self.handlers[token.NEWLINE] = self.handle_newline
 61        self.handlers[token.INDENT] = self.handle_indent
 62        self.handlers[token.DEDENT] = self.handle_dedent
 63        self.handlers[token.STRING] = self.handle_string
 64        self.handlers[symbol.funcdef]= self.handle_function
 65        self.handlers[symbol.parameters] = self.handle_parameters
 66        self.handlers[symbol.classdef] = self.handle_class
 67        self.handlers[token.NAME] = self.handle_name
 68        self.handlers[symbol.expr_stmt] = self.handle_expr_stmt
 69        #self.handlers[token.OP] = self.handle_op
 70        self.handlers[symbol.power] = self.handle_power
 71        if HAVE_ENCODING_DECL:
 72            self.handlers[symbol.encoding_decl] = self.handle_encoding_decl
 73        if HAVE_IMPORT_NAME:
 74            self.handlers[symbol.import_as_names] = self.handle_import_as_names
 75            self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names
 76            self.handlers[symbol.import_from] = self.handle_import_from
 77            self.handlers[symbol.import_name] = self.handle_import_name
 78        else:
 79            self.handlers[symbol.import_stmt] = self.handle_import
 80        if HAVE_DECORATOR:
 81            self.handlers[symbol.decorator] = self.handle_decorator
 82
 83        self.col = 0
 84        self.lineno = 1
 85        self.parameters = []
 86        self.scopes = []
 87
 88    def process_file(self, scope, sourcefile, sxr):
 89
 90        self.scopes = list(scope)
 91        input = open(sourcefile.abs_name, 'r+')
 92        src = input.readlines()
 93        self.lines = len(`len(src) + 1`)
 94        ptree = parser.ast2tuple(parser.suite(''.join(src)))
 95        input.seek(0)
 96        #if self.debug:
 97        #    self.lexer = LexerDebugger(tokenize.generate_tokens(input.readline))
 98        #else:
 99        self.lexer = tokenize.generate_tokens(input.readline)
100        self.sxr = open(sxr, 'w+')
101        lineno_template = '%%%ds' % self.lines
102        lineno = lineno_template % self.lineno
103        self.sxr.write(header % {'filename': sourcefile.name})
104        try:
105            self.handle(ptree)
106        except StopIteration:
107            raise
108        self.sxr.write(trailer)
109        self.sxr.close()
110        self.scopes.pop()
111
112    def handle(self, ptree):
113
114        if type(ptree) == tuple:
115            kind = ptree[0]
116            value = ptree[1:]
117            handler = self.handlers.get(kind, self.default_handler)
118            if self.debug:
119                print handler.__name__#, value
120            handler(value)
121        else:
122            raise Exception("Process error: Type is not a tuple %s" % str(ptree))
123
124
125    def default_handler(self, ptree):
126
127        for node in ptree:
128            if type(node) == tuple: self.handle(node)
129            elif type(node) == str: self.handle_token(node)
130            else: raise Exception("Invalid ptree node")
131
132
133    def next_token(self):
134        """Return the next visible token.
135        Process tokens that are not part of the parse tree silently."""
136
137        t = self.lexer.next()
138        while t[0] in [tokenize.NL, tokenize.COMMENT]:
139            if t[0] is tokenize.NL:
140                self.print_newline()
141            elif t[0] is tokenize.COMMENT:
142                self.print_token(t)
143                if t[1][-1] == '\n': self.print_newline()
144            t = self.lexer.next()
145        return t
146
147
148    def handle_token(self, item = None):
149
150        t = self.next_token()
151        if item is not None and t[1] != item:
152            raise Exception('Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0]))
153        else:
154            self.print_token(t)
155
156
157    def handle_name_as_xref(self, xref, name, from_ = None, type = None):
158
159        kind, value, (srow, scol), (erow, ecol), line = self.next_token()
160        if (kind, value) != (token.NAME, name):
161            raise Exception('Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0]))
162
163        if self.col != scol:
164            self.sxr.write(' ' * (scol - self.col))
165        attrs = []
166        if from_: attrs.append('from="%s"'%from_)
167        if type: attrs.append('type="%s"'%type)
168        a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value)
169        self.sxr.write(a)
170        self.col = ecol
171
172
173    def handle_tokens(self, ptree):
174
175        tokens = num_tokens(ptree)
176        for i in xrange(tokens):
177            self.handle_token()
178
179
180    def handle_end_marker(self, nodes): pass
181    def handle_newline(self, nodes):
182
183        self.handle_token()
184
185
186    def handle_indent(self, indent):
187
188        self.handle_token()
189
190
191    def handle_dedent(self, dedent):
192
193        self.handle_token()
194
195
196    def handle_string(self, content):
197
198        self.handle_token()
199
200
201    def handle_function(self, nodes):
202
203        if HAVE_DECORATOR:
204            if nodes[0][0] == symbol.decorators:
205                offset = 1
206                # FIXME
207                self.handle(nodes[0])
208            else:
209                offset = 0
210        else:
211            offset = 0
212
213        def_token = nodes[0 + offset]
214        self.handle_token(def_token[1])
215        name = nodes[1 + offset][1]
216        qname = tuple(self.scopes + [name])
217        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
218        # Handle the parameters.
219        self.handle(nodes[2 + offset])
220
221        colon_token = nodes[3 + offset]
222        self.handle_token(colon_token[1])
223        body = nodes[4 + offset]
224        # Theoretically, we'd have to push the function scope here.
225        # Practically, however, we don't inject xrefs (yet) into function bodies.
226        self.handle_tokens(body)
227
228        # Don't traverse the function body, since the ASG doesn't handle
229        # local declarations anyways.
230
231
232    def handle_parameters(self, nodes):
233
234        self.handle_token(nodes[0][1])
235        if nodes[1][0] == symbol.varargslist:
236            args = list(nodes[1][1:])
237            while args:
238                if args[0][0] == token.COMMA:
239                    self.handle_token(args[0][1])
240                    pass
241                elif args[0][0] == symbol.fpdef:
242                    self.handle_tokens(args[0])
243                elif args[0][0] == token.EQUAL:
244                    self.handle_token(args[0][1])
245                    del args[0]
246                    self.handle_tokens(args[0])
247                elif args[0][0] == token.DOUBLESTAR:
248                    self.handle_token(args[0][1])
249                    del args[0]
250                    self.handle_token(args[0][1])
251                elif args[0][0] == token.STAR:
252                    self.handle_token(args[0][1])
253                    del args[0]
254                    self.handle_token(args[0][1])
255                else:
256                    print "Unknown symbol:",args[0]
257                del args[0]
258        self.handle_token(nodes[-1][1])
259
260
261    def handle_class(self, nodes):
262
263        class_token = nodes[0]
264        self.handle_token(class_token[1])
265        name = nodes[1][1]
266        qname = tuple(self.scopes + [name])
267        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
268        base_clause = nodes[2][0] == token.LPAR and nodes[3] or None
269        self.handle_tokens(nodes[2])
270        bases = []
271        if base_clause:
272            self.handle_tokens(base_clause)
273            self.handle_token(')')
274            self.handle_token(':')
275
276            body = nodes[6]
277        else:
278            body = nodes[3]
279        self.scopes.append(name)
280        self.handle(body)
281        self.scopes.pop()
282
283
284    def handle_name(self, content):
285
286        self.handle_token(content[0])
287
288
289    def handle_expr_stmt(self, nodes):
290
291        for n in nodes: self.handle_tokens(n)
292
293
294    def handle_dotted_name(self, dname, rest):
295
296        self.handle_token(dname[0])
297        for name in dname[1:]:
298            self.handle_token('.')
299            self.handle_token(name)
300        map(self.handle, rest)
301
302
303    def handle_op(self, nodes): pass
304
305
306    def handle_power(self, content):
307
308        def get_dotted_name(content):
309            if content[0][0] != symbol.atom or content[0][1][0] != token.NAME:
310                return None
311            dotted_name = [content[0][1][1]]
312            i = 1
313            for param in content[1:]:
314                if param[0] != symbol.trailer: break
315                if param[1][0] != token.DOT: break
316                if param[2][0] != token.NAME: break
317                dotted_name.append(param[2][1])
318                i += 1
319            if i < len(content): return dotted_name, content[i:]
320            else: return dotted_name, []
321
322        name = get_dotted_name(content)
323        if name: self.handle_dotted_name(*name)
324        else: map(self.handle, content)
325
326
327    def handle_encoding_decl(self, nodes):
328
329        # For some reason the encoding is the last tuple item
330        for n in nodes[:-1]: self.handle(n)
331
332    def handle_import_as_names(self, nodes):
333
334        for n in nodes: self.handle(n)
335
336
337    def handle_dotted_as_names(self, nodes):
338
339        for n in nodes: self.handle(n)
340
341
342    def handle_import_from(self, nodes):
343
344        self.handle_token('from')
345        self.handle(nodes[1])
346        self.handle_token('import')
347        self.handle(nodes[3])
348
349
350    def handle_import_name(self, nodes):
351
352        self.handle_token('import')
353        self.handle_dotted_as_names(nodes[1][1:])
354
355
356    def handle_import(self, nodes):
357
358        #self.handle_token('import')
359        for n in nodes: self.handle(n)
360
361
362    def handle_decorator(self, nodes): pass
363
364
365    def print_token(self, t):
366
367        kind, value, (srow, scol), (erow, ecol), line = t
368        if kind == token.NEWLINE:
369            self.print_newline()
370        else:
371            if self.col != scol:
372                self.sxr.write(' ' * (scol - self.col))
373            if keyword.iskeyword(value):
374                format = '<span class="py-keyword">%s</span>'
375            elif kind == token.STRING:
376                format = '<span class="py-string">%s</span>'
377                chunks = value.split('\n')
378                for c in chunks[:-1]:
379                    self.sxr.write(format % escape(c))
380                    self.print_newline()
381                value = chunks[-1]
382
383            elif kind == tokenize.COMMENT:
384                format = '<span class="py-comment">%s</span>'
385                if value[-1] == '\n': value = value[:-1]
386            else:
387                format = '%s'
388
389            self.sxr.write(format % escape(value))
390            self.col = ecol
391
392
393    def print_newline(self):
394
395        self.col = 0
396        self.lineno += 1
397        self.sxr.write('</line>\n')
398        self.sxr.write('<line>')
399
400
401