171 lines
5.8 KiB
Python
Executable File
171 lines
5.8 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Lexing error finder
|
|
~~~~~~~~~~~~~~~~~~~
|
|
|
|
For the source files given on the command line, display
|
|
the text where Error tokens are being generated, along
|
|
with some context.
|
|
|
|
:copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import sys, os
|
|
|
|
# always prefer Pygments from source if exists
|
|
srcpath = os.path.join(os.path.dirname(__file__), '..')
|
|
if os.path.isdir(os.path.join(srcpath, 'pygments')):
|
|
sys.path.insert(0, srcpath)
|
|
|
|
|
|
from pygments.lexer import RegexLexer
|
|
from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
|
|
from pygments.token import Error, Text, _TokenType
|
|
from pygments.cmdline import _parse_options
|
|
|
|
|
|
class DebuggingRegexLexer(RegexLexer):
|
|
"""Make the state stack, position and current match instance attributes."""
|
|
|
|
def get_tokens_unprocessed(self, text, stack=('root',)):
|
|
"""
|
|
Split ``text`` into (tokentype, text) pairs.
|
|
|
|
``stack`` is the inital stack (default: ``['root']``)
|
|
"""
|
|
self.pos = 0
|
|
tokendefs = self._tokens
|
|
self.statestack = list(stack)
|
|
statetokens = tokendefs[self.statestack[-1]]
|
|
while 1:
|
|
for rexmatch, action, new_state in statetokens:
|
|
self.m = m = rexmatch(text, self.pos)
|
|
if m:
|
|
if type(action) is _TokenType:
|
|
yield self.pos, action, m.group()
|
|
else:
|
|
for item in action(self, m):
|
|
yield item
|
|
self.pos = m.end()
|
|
if new_state is not None:
|
|
# state transition
|
|
if isinstance(new_state, tuple):
|
|
for state in new_state:
|
|
if state == '#pop':
|
|
self.statestack.pop()
|
|
elif state == '#push':
|
|
self.statestack.append(self.statestack[-1])
|
|
else:
|
|
self.statestack.append(state)
|
|
elif isinstance(new_state, int):
|
|
# pop
|
|
del self.statestack[new_state:]
|
|
elif new_state == '#push':
|
|
self.statestack.append(self.statestack[-1])
|
|
else:
|
|
assert False, 'wrong state def: %r' % new_state
|
|
statetokens = tokendefs[self.statestack[-1]]
|
|
break
|
|
else:
|
|
try:
|
|
if text[self.pos] == '\n':
|
|
# at EOL, reset state to 'root'
|
|
self.pos += 1
|
|
self.statestack = ['root']
|
|
statetokens = tokendefs['root']
|
|
yield self.pos, Text, u'\n'
|
|
continue
|
|
yield self.pos, Error, text[self.pos]
|
|
self.pos += 1
|
|
except IndexError:
|
|
break
|
|
|
|
|
|
def main(fn, lexer=None, options={}):
|
|
if lexer is not None:
|
|
lx = get_lexer_by_name(lexer)
|
|
else:
|
|
try:
|
|
lx = get_lexer_for_filename(os.path.basename(fn), **options)
|
|
except ValueError:
|
|
try:
|
|
name, rest = fn.split('_', 1)
|
|
lx = get_lexer_by_name(name, **options)
|
|
except ValueError:
|
|
raise AssertionError('no lexer found for file %r' % fn)
|
|
debug_lexer = False
|
|
# does not work for e.g. ExtendedRegexLexers
|
|
if lx.__class__.__bases__ == (RegexLexer,):
|
|
lx.__class__.__bases__ = (DebuggingRegexLexer,)
|
|
debug_lexer = True
|
|
elif lx.__class__.__bases__ == (DebuggingRegexLexer,):
|
|
# already debugged before
|
|
debug_lexer = True
|
|
lno = 1
|
|
text = file(fn, 'U').read()
|
|
text = text.strip('\n') + '\n'
|
|
tokens = []
|
|
states = []
|
|
|
|
def show_token(tok, state):
|
|
reprs = map(repr, tok)
|
|
print ' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0],
|
|
if debug_lexer:
|
|
print ' ' + ' ' * (29-len(reprs[0])) + repr(state),
|
|
print
|
|
|
|
for type, val in lx.get_tokens(text):
|
|
lno += val.count('\n')
|
|
if type == Error:
|
|
print 'Error parsing', fn, 'on line', lno
|
|
print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':'
|
|
if showall:
|
|
for tok, state in map(None, tokens, states):
|
|
show_token(tok, state)
|
|
else:
|
|
for i in range(max(len(tokens) - num, 0), len(tokens)):
|
|
show_token(tokens[i], states[i])
|
|
print 'Error token:'
|
|
l = len(repr(val))
|
|
print ' ' + repr(val),
|
|
if debug_lexer and hasattr(lx, 'statestack'):
|
|
print ' ' * (60-l) + repr(lx.statestack),
|
|
print
|
|
print
|
|
return 1
|
|
tokens.append((type, val))
|
|
if debug_lexer:
|
|
if hasattr(lx, 'statestack'):
|
|
states.append(lx.statestack[:])
|
|
else:
|
|
states.append(None)
|
|
if showall:
|
|
for tok, state in map(None, tokens, states):
|
|
show_token(tok, state)
|
|
return 0
|
|
|
|
|
|
num = 10
|
|
showall = False
|
|
lexer = None
|
|
options = {}
|
|
|
|
if __name__ == '__main__':
|
|
import getopt
|
|
opts, args = getopt.getopt(sys.argv[1:], 'n:l:aO:')
|
|
for opt, val in opts:
|
|
if opt == '-n':
|
|
num = int(val)
|
|
elif opt == '-a':
|
|
showall = True
|
|
elif opt == '-l':
|
|
lexer = val
|
|
elif opt == '-O':
|
|
options = _parse_options([val])
|
|
ret = 0
|
|
for f in args:
|
|
ret += main(f, lexer, options)
|
|
sys.exit(bool(ret))
|