
After go1.16, go will use module mode by default, even when the repository is checked out under GOPATH or in a one-off directory. Add go.mod, go.sum to keep this repo buildable without opting out of the module mode. > go mod init github.com/mmcgrana/gobyexample > go mod tidy > go mod vendor In module mode, the 'vendor' directory is special and its contents will be actively maintained by the go command. pygments aren't the dependency the go will know about, so it will delete the contents from vendor directory. Move it to `third_party` directory now. And, vendor the blackfriday package. Note: the tutorial contents are not affected by the change in go1.16 because all the examples in this tutorial ask users to run the go command with the explicit list of files to be compiled (e.g. `go run hello-world.go` or `go build command-line-arguments.go`). When the source list is provided, the go command does not have to compute the build list and whether it's running in GOPATH mode or module mode becomes irrelevant.
245 lines
8.4 KiB
Python
Executable File
245 lines
8.4 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Lexing error finder
|
|
~~~~~~~~~~~~~~~~~~~
|
|
|
|
For the source files given on the command line, display
|
|
the text where Error tokens are being generated, along
|
|
with some context.
|
|
|
|
:copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
|
|
import os
|
|
import sys
|
|
|
|
# always prefer Pygments from source if exists
|
|
srcpath = os.path.join(os.path.dirname(__file__), '..')
|
|
if os.path.isdir(os.path.join(srcpath, 'pygments')):
|
|
sys.path.insert(0, srcpath)
|
|
|
|
|
|
from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
|
|
ProfilingRegexLexer, ProfilingRegexLexerMeta
|
|
from pygments.lexers import get_lexer_by_name, find_lexer_class, \
|
|
find_lexer_class_for_filename
|
|
from pygments.token import Error, Text, _TokenType
|
|
from pygments.cmdline import _parse_options
|
|
|
|
|
|
class DebuggingRegexLexer(ExtendedRegexLexer):
|
|
"""Make the state stack, position and current match instance attributes."""
|
|
|
|
def get_tokens_unprocessed(self, text, stack=('root',)):
|
|
"""
|
|
Split ``text`` into (tokentype, text) pairs.
|
|
|
|
``stack`` is the inital stack (default: ``['root']``)
|
|
"""
|
|
tokendefs = self._tokens
|
|
self.ctx = ctx = LexerContext(text, 0)
|
|
ctx.stack = list(stack)
|
|
statetokens = tokendefs[ctx.stack[-1]]
|
|
while 1:
|
|
for rexmatch, action, new_state in statetokens:
|
|
self.m = m = rexmatch(text, ctx.pos, ctx.end)
|
|
if m:
|
|
if action is not None:
|
|
if type(action) is _TokenType:
|
|
yield ctx.pos, action, m.group()
|
|
ctx.pos = m.end()
|
|
else:
|
|
if not isinstance(self, ExtendedRegexLexer):
|
|
for item in action(self, m):
|
|
yield item
|
|
ctx.pos = m.end()
|
|
else:
|
|
for item in action(self, m, ctx):
|
|
yield item
|
|
if not new_state:
|
|
# altered the state stack?
|
|
statetokens = tokendefs[ctx.stack[-1]]
|
|
if new_state is not None:
|
|
# state transition
|
|
if isinstance(new_state, tuple):
|
|
for state in new_state:
|
|
if state == '#pop':
|
|
ctx.stack.pop()
|
|
elif state == '#push':
|
|
ctx.stack.append(ctx.stack[-1])
|
|
else:
|
|
ctx.stack.append(state)
|
|
elif isinstance(new_state, int):
|
|
# pop
|
|
del ctx.stack[new_state:]
|
|
elif new_state == '#push':
|
|
ctx.stack.append(ctx.stack[-1])
|
|
else:
|
|
assert False, 'wrong state def: %r' % new_state
|
|
statetokens = tokendefs[ctx.stack[-1]]
|
|
break
|
|
else:
|
|
try:
|
|
if ctx.pos >= ctx.end:
|
|
break
|
|
if text[ctx.pos] == '\n':
|
|
# at EOL, reset state to 'root'
|
|
ctx.stack = ['root']
|
|
statetokens = tokendefs['root']
|
|
yield ctx.pos, Text, u'\n'
|
|
ctx.pos += 1
|
|
continue
|
|
yield ctx.pos, Error, text[ctx.pos]
|
|
ctx.pos += 1
|
|
except IndexError:
|
|
break
|
|
|
|
|
|
def main(fn, lexer=None, options={}):
|
|
if lexer is not None:
|
|
lxcls = get_lexer_by_name(lexer).__class__
|
|
else:
|
|
lxcls = find_lexer_class_for_filename(os.path.basename(fn))
|
|
if lxcls is None:
|
|
name, rest = fn.split('_', 1)
|
|
lxcls = find_lexer_class(name)
|
|
if lxcls is None:
|
|
raise AssertionError('no lexer found for file %r' % fn)
|
|
debug_lexer = False
|
|
# if profile:
|
|
# # does not work for e.g. ExtendedRegexLexers
|
|
# if lxcls.__bases__ == (RegexLexer,):
|
|
# # yes we can! (change the metaclass)
|
|
# lxcls.__class__ = ProfilingRegexLexerMeta
|
|
# lxcls.__bases__ = (ProfilingRegexLexer,)
|
|
# lxcls._prof_sort_index = profsort
|
|
# else:
|
|
# if lxcls.__bases__ == (RegexLexer,):
|
|
# lxcls.__bases__ = (DebuggingRegexLexer,)
|
|
# debug_lexer = True
|
|
# elif lxcls.__bases__ == (DebuggingRegexLexer,):
|
|
# # already debugged before
|
|
# debug_lexer = True
|
|
# else:
|
|
# # HACK: ExtendedRegexLexer subclasses will only partially work here.
|
|
# lxcls.__bases__ = (DebuggingRegexLexer,)
|
|
# debug_lexer = True
|
|
|
|
lx = lxcls(**options)
|
|
lno = 1
|
|
if fn == '-':
|
|
text = sys.stdin.read()
|
|
else:
|
|
with open(fn, 'rb') as fp:
|
|
text = fp.read().decode('utf-8')
|
|
text = text.strip('\n') + '\n'
|
|
tokens = []
|
|
states = []
|
|
|
|
def show_token(tok, state):
|
|
reprs = list(map(repr, tok))
|
|
print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
|
|
if debug_lexer:
|
|
print(' ' + ' ' * (29-len(reprs[0])) + ' : '.join(state) if state else '', end=' ')
|
|
print()
|
|
|
|
for type, val in lx.get_tokens(text):
|
|
lno += val.count('\n')
|
|
if type == Error and not ignerror:
|
|
print('Error parsing', fn, 'on line', lno)
|
|
if not showall:
|
|
print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
|
|
for i in range(max(len(tokens) - num, 0), len(tokens)):
|
|
if debug_lexer:
|
|
show_token(tokens[i], states[i])
|
|
else:
|
|
show_token(tokens[i], None)
|
|
print('Error token:')
|
|
l = len(repr(val))
|
|
print(' ' + repr(val), end=' ')
|
|
if debug_lexer and hasattr(lx, 'ctx'):
|
|
print(' ' * (60-l) + ' : '.join(lx.ctx.stack), end=' ')
|
|
print()
|
|
print()
|
|
return 1
|
|
tokens.append((type, val))
|
|
if debug_lexer:
|
|
if hasattr(lx, 'ctx'):
|
|
states.append(lx.ctx.stack[:])
|
|
else:
|
|
states.append(None)
|
|
if showall:
|
|
show_token((type, val), states[-1] if debug_lexer else None)
|
|
return 0
|
|
|
|
|
|
def print_help():
|
|
print('''\
|
|
Pygments development helper to quickly debug lexers.
|
|
|
|
scripts/debug_lexer.py [options] file ...
|
|
|
|
Give one or more filenames to lex them and display possible error tokens
|
|
and/or profiling info. Files are assumed to be encoded in UTF-8.
|
|
|
|
Selecting lexer and options:
|
|
|
|
-l NAME use lexer named NAME (default is to guess from
|
|
the given filenames)
|
|
-O OPTIONSTR use lexer options parsed from OPTIONSTR
|
|
|
|
Debugging lexing errors:
|
|
|
|
-n N show the last N tokens on error
|
|
-a always show all lexed tokens (default is only
|
|
to show them when an error occurs)
|
|
-e do not stop on error tokens
|
|
|
|
Profiling:
|
|
|
|
-p use the ProfilingRegexLexer to profile regexes
|
|
instead of the debugging lexer
|
|
-s N sort profiling output by column N (default is
|
|
column 4, the time per call)
|
|
''')
|
|
|
|
num = 10
|
|
showall = False
|
|
ignerror = False
|
|
lexer = None
|
|
options = {}
|
|
profile = False
|
|
profsort = 4
|
|
|
|
if __name__ == '__main__':
|
|
import getopt
|
|
opts, args = getopt.getopt(sys.argv[1:], 'n:l:aepO:s:h')
|
|
for opt, val in opts:
|
|
if opt == '-n':
|
|
num = int(val)
|
|
elif opt == '-a':
|
|
showall = True
|
|
elif opt == '-e':
|
|
ignerror = True
|
|
elif opt == '-l':
|
|
lexer = val
|
|
elif opt == '-p':
|
|
profile = True
|
|
elif opt == '-s':
|
|
profsort = int(val)
|
|
elif opt == '-O':
|
|
options = _parse_options([val])
|
|
elif opt == '-h':
|
|
print_help()
|
|
sys.exit(0)
|
|
ret = 0
|
|
if not args:
|
|
print_help()
|
|
for f in args:
|
|
ret += main(f, lexer, options)
|
|
sys.exit(bool(ret))
|