1   
  2   
  3   
  4  """ 
  5  This file is part of the web2py Web Framework 
  6  Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 
  7  License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 
  8  """ 
  9   
 10  import re 
 11  import cgi 
 12   
 13  __all__ = ['highlight'] 
 14   
 15   
 17   
 18      """ 
 19      Do syntax highlighting. 
 20      """ 
 21   
 22 -    def __init__( 
 23          self, 
 24          mode, 
 25          link=None, 
 26          styles=None, 
 27          ): 
  28          """ 
 29          Initialise highlighter: 
 30              mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN) 
 31          """ 
 32          styles = styles or {} 
 33          mode = mode.upper() 
 34          if link and link[-1] != '/': 
 35              link = link + '/' 
 36          self.link = link 
 37          self.styles = styles 
 38          self.output = [] 
 39          self.span_style = None 
 40          if mode == 'WEB2PY': 
 41              (mode, self.suppress_tokens) = ('PYTHON', []) 
 42          elif mode == 'PYTHON': 
 43              self.suppress_tokens = ['GOTOHTML'] 
 44          elif mode == 'CPP': 
 45              (mode, self.suppress_tokens) = ('C', []) 
 46          elif mode == 'C': 
 47              self.suppress_tokens = ['CPPKEYWORD'] 
 48          elif mode == 'HTML_PLAIN': 
 49              (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON']) 
 50          elif mode == 'HTML': 
 51              self.suppress_tokens = [] 
 52          else: 
 53              raise SyntaxError, 'Unknown mode: %s' % mode 
 54          self.mode = mode 
  55   
 56 -    def c_tokenizer( 
 57          self, 
 58          token, 
 59          match, 
 60          style, 
 61          ): 
  69   
 76          """ 
 77          Callback for python specific highlighting. 
 78          """ 
 79   
 80          value = cgi.escape(match.group()) 
 81          if token == 'MULTILINESTRING': 
 82              self.change_style(token, style) 
 83              self.output.append(value) 
 84              self.strMultilineString = match.group(1) 
 85              return 'PYTHONMultilineString' 
 86          elif token == 'ENDMULTILINESTRING': 
 87              if match.group(1) == self.strMultilineString: 
 88                  self.output.append(value) 
 89                  self.strMultilineString = '' 
 90                  return 'PYTHON' 
 91          if style and style[:5] == 'link:': 
 92              self.change_style(None, None) 
 93              (url, style) = style[5:].split(';', 1) 
 94              if url == 'None' or url == '': 
 95                  self.output.append('<span style="%s">%s</span>' 
 96                                      % (style, value)) 
 97              else: 
 98                  self.output.append('<a href="%s%s" style="%s">%s</a>' 
 99                                      % (url, value, style, value)) 
100          else: 
101              self.change_style(token, style) 
102              self.output.append(value) 
103          if token == 'GOTOHTML': 
104              return 'HTML' 
105          return None 
 106   
107 -    def html_tokenizer( 
108          self, 
109          token, 
110          match, 
111          style, 
112          ): 
 113          """ 
114          Callback for HTML specific highlighting. 
115          """ 
116   
117          value = cgi.escape(match.group()) 
118          self.change_style(token, style) 
119          self.output.append(value) 
120          if token == 'GOTOPYTHON': 
121              return 'PYTHON' 
122          return None 
 123   
124      all_styles = { 
125          'C': (c_tokenizer, ( 
126              ('COMMENT', re.compile(r'//.*\r?\n'), 
127               'color: green; font-style: italic'), 
128              ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL), 
129               'color: green; font-style: italic'), 
130              ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n', 
131               re.DOTALL), 'color: magenta; font-style: italic'), 
132              ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), 
133               'font-weight: bold'), 
134              ('NUMBER', 
135               re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), 
136               'color: red'), 
137              ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|' 
138                + r'signed|unsigned|float|double|' 
139                + r'goto|break|return|continue|asm|' 
140                + r'case|default|if|else|switch|while|for|do|' 
141                + r'struct|union|enum|typedef|' 
142                + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), 
143               'color:#185369; font-weight: bold'), 
144              ('CPPKEYWORD', 
145               re.compile(r'(class|private|protected|public|template|new|delete|' 
146                + r'this|friend|using|inline|export|bool|throw|try|catch|' 
147                + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), 
148               'color: blue; font-weight: bold'), 
149              ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), 
150               'color: #FF9966'), 
151              ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), 
152               None), 
153              ('WHITESPACE', re.compile(r'[   \r\n]+'), 'Keep'), 
154              )), 
155          'PYTHON': (python_tokenizer, ( 
156              ('GOTOHTML', re.compile(r'\}\}'), 'color: red'), 
157              ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), 
158               'font-weight: bold'), 
159              ('NUMBER', 
160               re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+' 
161               ), 'color: red'), 
162              ('KEYWORD', 
163               re.compile(r'(def|class|break|continue|del|exec|finally|pass|' 
164                + r'print|raise|return|try|except|global|assert|lambda|' 
165                + r'yield|for|while|if|elif|else|and|in|is|not|or|import|' 
166                + r'from|True|False)(?![a-zA-Z0-9_])'), 
167               'color:#185369; font-weight: bold'), 
168              ('WEB2PY', 
169               re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])' 
170               ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'), 
171              ('MAGIC', re.compile(r'self|None'), 
172               'color:#185369; font-weight: bold'), 
173              ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'), 
174               'color: #FF9966'), 
175              ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"' 
176               ), 'color: #FF9966'), 
177              ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), 
178               None), 
179              ('COMMENT', re.compile(r'\#.*\r?\n'), 
180               'color: green; font-style: italic'), 
181              ('WHITESPACE', re.compile(r'[   \r\n]+'), 'Keep'), 
182              )), 
183          'PYTHONMultilineString': (python_tokenizer, 
184                                    (('ENDMULTILINESTRING', 
185                                    re.compile(r'.*?("""|\'\'\')', 
186                                    re.DOTALL), 'color: darkred'), )), 
187          'HTML': (html_tokenizer, ( 
188              ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'), 
189              ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'), 
190               'color: green; font-style: italic'), 
191              ('XMLCRAP', re.compile(r'<![^>]*>'), 
192               'color: blue; font-style: italic'), 
193              ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE 
194                + re.DOTALL), 'color: black'), 
195              ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'), 
196               'color: darkred; font-weight: bold'), 
197              ('ENDTAG', re.compile(r'/?>'), 
198               'color: darkred; font-weight: bold'), 
199              )), 
200          } 
201   
203          """ 
204          Syntax highlight some python code. 
205          Returns html version of code. 
206          """ 
207   
208          i = 0 
209          mode = self.mode 
210          while i < len(data): 
211              for (token, o_re, style) in Highlighter.all_styles[mode][1]: 
212                  if not token in self.suppress_tokens: 
213                      match = o_re.match(data, i) 
214                      if match: 
215                          if style: 
216                              new_mode = \ 
217                                  Highlighter.all_styles[mode][0](self, 
218                                      token, match, style 
219                                       % dict(link=self.link)) 
220                          else: 
221                              new_mode = \ 
222                                  Highlighter.all_styles[mode][0](self, 
223                                      token, match, style) 
224                          if not new_mode is None: 
225                              mode = new_mode 
226                          i += max(1, len(match.group())) 
227                          break 
228              else: 
229                  self.change_style(None, None) 
230                  self.output.append(data[i]) 
231                  i += 1 
232          self.change_style(None, None) 
233          return ''.join(self.output).expandtabs(4) 
 234   
236          """ 
237          Generate output to change from existing style to another style only. 
238          """ 
239   
240          if token in self.styles: 
241              style = self.styles[token] 
242          if self.span_style != style: 
243              if style != 'Keep': 
244                  if not self.span_style is None: 
245                      self.output.append('</span>') 
246                  if not style is None: 
247                      self.output.append('<span style="%s">' % style) 
248                  self.span_style = style 
  249   
250   
251 -def highlight( 
252      code, 
253      language, 
254      link='/examples/globals/vars/', 
255      counter=1, 
256      styles=None, 
257      highlight_line=None, 
258      context_lines=None, 
259      attributes=None, 
260      ): 
 261      styles = styles or {} 
262      attributes = attributes or {} 
263      if not 'CODE' in styles: 
264          code_style = """ 
265          font-size: 11px; 
266          font-family: Bitstream Vera Sans Mono,monospace; 
267          background-color: transparent; 
268          margin: 0; 
269          padding: 5px; 
270          border: none; 
271          overflow: auto; 
272          white-space: pre !important;\n""" 
273      else: 
274          code_style = styles['CODE'] 
275      if not 'LINENUMBERS' in styles: 
276          linenumbers_style = """ 
277          font-size: 11px; 
278          font-family: Bitstream Vera Sans Mono,monospace; 
279          background-color: transparent; 
280          margin: 0; 
281          padding: 5px; 
282          border: none; 
283          color: #A0A0A0;\n""" 
284      else: 
285          linenumbers_style = styles['LINENUMBERS'] 
286      if not 'LINEHIGHLIGHT' in styles: 
287          linehighlight_style = "background-color: #EBDDE2;" 
288      else: 
289          linehighlight_style = styles['LINEHIGHLIGHT'] 
290   
291      if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML', 
292              'WEB2PY']: 
293          code = Highlighter(language, link, styles).highlight(code) 
294      else: 
295          code = cgi.escape(code) 
296      lines = code.split('\n') 
297   
298      if counter is None: 
299          linenumbers = [''] * len(lines) 
300      elif isinstance(counter, str): 
301          linenumbers = [cgi.escape(counter)] * len(lines) 
302      else: 
303          linenumbers = [str(i + counter) + '.' for i in 
304                                 xrange(len(lines))] 
305   
306      if highlight_line: 
307          if counter and not isinstance(counter, str): 
308              lineno = highlight_line - counter 
309          else: 
310              lineno = highlight_line 
311          if lineno<len(lines): 
312              lines[lineno] = '<div style="%s">%s</div>' % (linehighlight_style, lines[lineno]) 
313              linenumbers[lineno] = '<div style="%s">%s</div>' % (linehighlight_style, linenumbers[lineno]) 
314   
315          if context_lines: 
316              if lineno + context_lines < len(lines): 
317                  del lines[lineno + context_lines:] 
318                  del linenumbers[lineno + context_lines:] 
319              if lineno -context_lines > 0: 
320                  del lines[0:lineno - context_lines] 
321                  del linenumbers[0:lineno - context_lines] 
322   
323      code = '<br/>'.join(lines) 
324      numbers = '<br/>'.join(linenumbers) 
325   
326      items = attributes.items() 
327      fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1] 
328                     == '_' and value is None] + ['%s="%s"' 
329                     % (key[1:].lower(), str(value).replace('"', "'")) 
330                    for (key, value) in attributes.items() if key[:1] 
331                     == '_' and value]) 
332      if fa: 
333          fa = ' ' + fa 
334      return '<table%s><tr valign="top"><td style="width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>'\ 
335           % (fa, linenumbers_style, numbers, code_style, code) 
 336   
337   
338  if __name__ == '__main__': 
339      import sys 
340      argfp = open(sys.argv[1]) 
341      data = argfp.read() 
342      argfp.close() 
343      print '<html><body>' + highlight(data, sys.argv[2])\ 
344           + '</body></html>' 
345