You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

575 lines
19 KiB

4 years ago
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.cmdline
  4. ~~~~~~~~~~~~~~~~
  5. Command line interface.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import os
  10. import sys
  11. import getopt
  12. from textwrap import dedent
  13. from pygments import __version__, highlight
  14. from pygments.util import ClassNotFound, OptionError, docstring_headline, \
  15. guess_decode, guess_decode_from_terminal, terminal_encoding, \
  16. UnclosingTextIOWrapper
  17. from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
  18. load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
  19. from pygments.lexers.special import TextLexer
  20. from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
  21. from pygments.formatters import get_all_formatters, get_formatter_by_name, \
  22. load_formatter_from_file, get_formatter_for_filename, find_formatter_class
  23. from pygments.formatters.terminal import TerminalFormatter
  24. from pygments.formatters.terminal256 import Terminal256Formatter
  25. from pygments.filters import get_all_filters, find_filter_class
  26. from pygments.styles import get_all_styles, get_style_by_name
  27. USAGE = """\
  28. Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>]
  29. [-O <options>] [-P <option=value>] [-s] [-v] [-x] [-o <outfile>] [<infile>]
  30. %s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>]
  31. %s -L [<which> ...]
  32. %s -N <filename>
  33. %s -H <type> <name>
  34. %s -h | -V
  35. Highlight the input file and write the result to <outfile>.
  36. If no input file is given, use stdin, if -o is not given, use stdout.
  37. If -s is passed, lexing will be done in "streaming" mode, reading and
  38. highlighting one line at a time. This will only work properly with
  39. lexers that have no constructs spanning multiple lines!
  40. <lexer> is a lexer name (query all lexer names with -L). If -l is not
  41. given, the lexer is guessed from the extension of the input file name
  42. (this obviously doesn't work if the input is stdin). If -g is passed,
  43. attempt to guess the lexer from the file contents, or pass through as
  44. plain text if this fails (this can work for stdin).
  45. Likewise, <formatter> is a formatter name, and will be guessed from
  46. the extension of the output file name. If no output file is given,
  47. the terminal formatter will be used by default.
  48. The additional option -x allows custom lexers and formatters to be
  49. loaded from a .py file relative to the current working directory. For
  50. example, ``-l ./customlexer.py -x``. By default, this option expects a
  51. file with a class named CustomLexer or CustomFormatter; you can also
  52. specify your own class name with a colon (``-l ./lexer.py:MyLexer``).
  53. Users should be very careful not to use this option with untrusted files,
  54. because it will import and run them.
  55. With the -O option, you can give the lexer and formatter a comma-
  56. separated list of options, e.g. ``-O bg=light,python=cool``.
  57. The -P option adds lexer and formatter options like the -O option, but
  58. you can only give one option per -P. That way, the option value may
  59. contain commas and equals signs, which it can't with -O, e.g.
  60. ``-P "heading=Pygments, the Python highlighter".
  61. With the -F option, you can add filters to the token stream, you can
  62. give options in the same way as for -O after a colon (note: there must
  63. not be spaces around the colon).
  64. The -O, -P and -F options can be given multiple times.
  65. With the -S option, print out style definitions for style <style>
  66. for formatter <formatter>. The argument given by -a is formatter
  67. dependent.
  68. The -L option lists lexers, formatters, styles or filters -- set
  69. `which` to the thing you want to list (e.g. "styles"), or omit it to
  70. list everything.
  71. The -N option guesses and prints out a lexer name based solely on
  72. the given filename. It does not take input or highlight anything.
  73. If no specific lexer can be determined "text" is returned.
  74. The -H option prints detailed help for the object <name> of type <type>,
  75. where <type> is one of "lexer", "formatter" or "filter".
  76. The -s option processes lines one at a time until EOF, rather than
  77. waiting to process the entire file. This only works for stdin, and
  78. is intended for streaming input such as you get from 'tail -f'.
  79. Example usage: "tail -f sql.log | pygmentize -s -l sql"
  80. The -v option prints a detailed traceback on unhandled exceptions,
  81. which is useful for debugging and bug reports.
  82. The -h option prints this help.
  83. The -V option prints the package version.
  84. """
  85. def _parse_options(o_strs):
  86. opts = {}
  87. if not o_strs:
  88. return opts
  89. for o_str in o_strs:
  90. if not o_str.strip():
  91. continue
  92. o_args = o_str.split(',')
  93. for o_arg in o_args:
  94. o_arg = o_arg.strip()
  95. try:
  96. o_key, o_val = o_arg.split('=', 1)
  97. o_key = o_key.strip()
  98. o_val = o_val.strip()
  99. except ValueError:
  100. opts[o_arg] = True
  101. else:
  102. opts[o_key] = o_val
  103. return opts
  104. def _parse_filters(f_strs):
  105. filters = []
  106. if not f_strs:
  107. return filters
  108. for f_str in f_strs:
  109. if ':' in f_str:
  110. fname, fopts = f_str.split(':', 1)
  111. filters.append((fname, _parse_options([fopts])))
  112. else:
  113. filters.append((f_str, {}))
  114. return filters
  115. def _print_help(what, name):
  116. try:
  117. if what == 'lexer':
  118. cls = get_lexer_by_name(name)
  119. print("Help on the %s lexer:" % cls.name)
  120. print(dedent(cls.__doc__))
  121. elif what == 'formatter':
  122. cls = find_formatter_class(name)
  123. print("Help on the %s formatter:" % cls.name)
  124. print(dedent(cls.__doc__))
  125. elif what == 'filter':
  126. cls = find_filter_class(name)
  127. print("Help on the %s filter:" % name)
  128. print(dedent(cls.__doc__))
  129. return 0
  130. except (AttributeError, ValueError):
  131. print("%s not found!" % what, file=sys.stderr)
  132. return 1
  133. def _print_list(what):
  134. if what == 'lexer':
  135. print()
  136. print("Lexers:")
  137. print("~~~~~~~")
  138. info = []
  139. for fullname, names, exts, _ in get_all_lexers():
  140. tup = (', '.join(names)+':', fullname,
  141. exts and '(filenames ' + ', '.join(exts) + ')' or '')
  142. info.append(tup)
  143. info.sort()
  144. for i in info:
  145. print(('* %s\n %s %s') % i)
  146. elif what == 'formatter':
  147. print()
  148. print("Formatters:")
  149. print("~~~~~~~~~~~")
  150. info = []
  151. for cls in get_all_formatters():
  152. doc = docstring_headline(cls)
  153. tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
  154. '(filenames ' + ', '.join(cls.filenames) + ')' or '')
  155. info.append(tup)
  156. info.sort()
  157. for i in info:
  158. print(('* %s\n %s %s') % i)
  159. elif what == 'filter':
  160. print()
  161. print("Filters:")
  162. print("~~~~~~~~")
  163. for name in get_all_filters():
  164. cls = find_filter_class(name)
  165. print("* " + name + ':')
  166. print(" %s" % docstring_headline(cls))
  167. elif what == 'style':
  168. print()
  169. print("Styles:")
  170. print("~~~~~~~")
  171. for name in get_all_styles():
  172. cls = get_style_by_name(name)
  173. print("* " + name + ':')
  174. print(" %s" % docstring_headline(cls))
  175. def main_inner(popts, args, usage):
  176. opts = {}
  177. O_opts = []
  178. P_opts = []
  179. F_opts = []
  180. for opt, arg in popts:
  181. if opt == '-O':
  182. O_opts.append(arg)
  183. elif opt == '-P':
  184. P_opts.append(arg)
  185. elif opt == '-F':
  186. F_opts.append(arg)
  187. opts[opt] = arg
  188. if opts.pop('-h', None) is not None:
  189. print(usage)
  190. return 0
  191. if opts.pop('-V', None) is not None:
  192. print('Pygments version %s, (c) 2006-2019 by Georg Brandl.' % __version__)
  193. return 0
  194. # handle ``pygmentize -L``
  195. L_opt = opts.pop('-L', None)
  196. if L_opt is not None:
  197. if opts:
  198. print(usage, file=sys.stderr)
  199. return 2
  200. # print version
  201. main(['', '-V'])
  202. if not args:
  203. args = ['lexer', 'formatter', 'filter', 'style']
  204. for arg in args:
  205. _print_list(arg.rstrip('s'))
  206. return 0
  207. # handle ``pygmentize -H``
  208. H_opt = opts.pop('-H', None)
  209. if H_opt is not None:
  210. if opts or len(args) != 2:
  211. print(usage, file=sys.stderr)
  212. return 2
  213. what, name = args # pylint: disable=unbalanced-tuple-unpacking
  214. if what not in ('lexer', 'formatter', 'filter'):
  215. print(usage, file=sys.stderr)
  216. return 2
  217. return _print_help(what, name)
  218. # parse -O options
  219. parsed_opts = _parse_options(O_opts)
  220. opts.pop('-O', None)
  221. # parse -P options
  222. for p_opt in P_opts:
  223. try:
  224. name, value = p_opt.split('=', 1)
  225. except ValueError:
  226. parsed_opts[p_opt] = True
  227. else:
  228. parsed_opts[name] = value
  229. opts.pop('-P', None)
  230. # encodings
  231. inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
  232. outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
  233. # handle ``pygmentize -N``
  234. infn = opts.pop('-N', None)
  235. if infn is not None:
  236. lexer = find_lexer_class_for_filename(infn)
  237. if lexer is None:
  238. lexer = TextLexer
  239. print(lexer.aliases[0])
  240. return 0
  241. # handle ``pygmentize -S``
  242. S_opt = opts.pop('-S', None)
  243. a_opt = opts.pop('-a', None)
  244. if S_opt is not None:
  245. f_opt = opts.pop('-f', None)
  246. if not f_opt:
  247. print(usage, file=sys.stderr)
  248. return 2
  249. if opts or args:
  250. print(usage, file=sys.stderr)
  251. return 2
  252. try:
  253. parsed_opts['style'] = S_opt
  254. fmter = get_formatter_by_name(f_opt, **parsed_opts)
  255. except ClassNotFound as err:
  256. print(err, file=sys.stderr)
  257. return 1
  258. print(fmter.get_style_defs(a_opt or ''))
  259. return 0
  260. # if no -S is given, -a is not allowed
  261. if a_opt is not None:
  262. print(usage, file=sys.stderr)
  263. return 2
  264. # parse -F options
  265. F_opts = _parse_filters(F_opts)
  266. opts.pop('-F', None)
  267. allow_custom_lexer_formatter = False
  268. # -x: allow custom (eXternal) lexers and formatters
  269. if opts.pop('-x', None) is not None:
  270. allow_custom_lexer_formatter = True
  271. # select lexer
  272. lexer = None
  273. # given by name?
  274. lexername = opts.pop('-l', None)
  275. if lexername:
  276. # custom lexer, located relative to user's cwd
  277. if allow_custom_lexer_formatter and '.py' in lexername:
  278. try:
  279. filename = None
  280. name = None
  281. if ':' in lexername:
  282. filename, name = lexername.rsplit(':', 1)
  283. if '.py' in name:
  284. # This can happen on Windows: If the lexername is
  285. # C:\lexer.py -- return to normal load path in that case
  286. name = None
  287. if filename and name:
  288. lexer = load_lexer_from_file(filename, name,
  289. **parsed_opts)
  290. else:
  291. lexer = load_lexer_from_file(lexername, **parsed_opts)
  292. except ClassNotFound as err:
  293. print('Error:', err, file=sys.stderr)
  294. return 1
  295. else:
  296. try:
  297. lexer = get_lexer_by_name(lexername, **parsed_opts)
  298. except (OptionError, ClassNotFound) as err:
  299. print('Error:', err, file=sys.stderr)
  300. return 1
  301. # read input code
  302. code = None
  303. if args:
  304. if len(args) > 1:
  305. print(usage, file=sys.stderr)
  306. return 2
  307. if '-s' in opts:
  308. print('Error: -s option not usable when input file specified',
  309. file=sys.stderr)
  310. return 2
  311. infn = args[0]
  312. try:
  313. with open(infn, 'rb') as infp:
  314. code = infp.read()
  315. except Exception as err:
  316. print('Error: cannot read infile:', err, file=sys.stderr)
  317. return 1
  318. if not inencoding:
  319. code, inencoding = guess_decode(code)
  320. # do we have to guess the lexer?
  321. if not lexer:
  322. try:
  323. lexer = get_lexer_for_filename(infn, code, **parsed_opts)
  324. except ClassNotFound as err:
  325. if '-g' in opts:
  326. try:
  327. lexer = guess_lexer(code, **parsed_opts)
  328. except ClassNotFound:
  329. lexer = TextLexer(**parsed_opts)
  330. else:
  331. print('Error:', err, file=sys.stderr)
  332. return 1
  333. except OptionError as err:
  334. print('Error:', err, file=sys.stderr)
  335. return 1
  336. elif '-s' not in opts: # treat stdin as full file (-s support is later)
  337. # read code from terminal, always in binary mode since we want to
  338. # decode ourselves and be tolerant with it
  339. code = sys.stdin.buffer.read() # use .buffer to get a binary stream
  340. if not inencoding:
  341. code, inencoding = guess_decode_from_terminal(code, sys.stdin)
  342. # else the lexer will do the decoding
  343. if not lexer:
  344. try:
  345. lexer = guess_lexer(code, **parsed_opts)
  346. except ClassNotFound:
  347. lexer = TextLexer(**parsed_opts)
  348. else: # -s option needs a lexer with -l
  349. if not lexer:
  350. print('Error: when using -s a lexer has to be selected with -l',
  351. file=sys.stderr)
  352. return 2
  353. # process filters
  354. for fname, fopts in F_opts:
  355. try:
  356. lexer.add_filter(fname, **fopts)
  357. except ClassNotFound as err:
  358. print('Error:', err, file=sys.stderr)
  359. return 1
  360. # select formatter
  361. outfn = opts.pop('-o', None)
  362. fmter = opts.pop('-f', None)
  363. if fmter:
  364. # custom formatter, located relative to user's cwd
  365. if allow_custom_lexer_formatter and '.py' in fmter:
  366. try:
  367. filename = None
  368. name = None
  369. if ':' in fmter:
  370. # Same logic as above for custom lexer
  371. filename, name = fmter.rsplit(':', 1)
  372. if '.py' in name:
  373. name = None
  374. if filename and name:
  375. fmter = load_formatter_from_file(filename, name,
  376. **parsed_opts)
  377. else:
  378. fmter = load_formatter_from_file(fmter, **parsed_opts)
  379. except ClassNotFound as err:
  380. print('Error:', err, file=sys.stderr)
  381. return 1
  382. else:
  383. try:
  384. fmter = get_formatter_by_name(fmter, **parsed_opts)
  385. except (OptionError, ClassNotFound) as err:
  386. print('Error:', err, file=sys.stderr)
  387. return 1
  388. if outfn:
  389. if not fmter:
  390. try:
  391. fmter = get_formatter_for_filename(outfn, **parsed_opts)
  392. except (OptionError, ClassNotFound) as err:
  393. print('Error:', err, file=sys.stderr)
  394. return 1
  395. try:
  396. outfile = open(outfn, 'wb')
  397. except Exception as err:
  398. print('Error: cannot open outfile:', err, file=sys.stderr)
  399. return 1
  400. else:
  401. if not fmter:
  402. if '256' in os.environ.get('TERM', ''):
  403. fmter = Terminal256Formatter(**parsed_opts)
  404. else:
  405. fmter = TerminalFormatter(**parsed_opts)
  406. outfile = sys.stdout.buffer
  407. # determine output encoding if not explicitly selected
  408. if not outencoding:
  409. if outfn:
  410. # output file? use lexer encoding for now (can still be None)
  411. fmter.encoding = inencoding
  412. else:
  413. # else use terminal encoding
  414. fmter.encoding = terminal_encoding(sys.stdout)
  415. # provide coloring under Windows, if possible
  416. if not outfn and sys.platform in ('win32', 'cygwin') and \
  417. fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
  418. # unfortunately colorama doesn't support binary streams on Py3
  419. outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
  420. fmter.encoding = None
  421. try:
  422. import colorama.initialise
  423. except ImportError:
  424. pass
  425. else:
  426. outfile = colorama.initialise.wrap_stream(
  427. outfile, convert=None, strip=None, autoreset=False, wrap=True)
  428. # When using the LaTeX formatter and the option `escapeinside` is
  429. # specified, we need a special lexer which collects escaped text
  430. # before running the chosen language lexer.
  431. escapeinside = parsed_opts.get('escapeinside', '')
  432. if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
  433. left = escapeinside[0]
  434. right = escapeinside[1]
  435. lexer = LatexEmbeddedLexer(left, right, lexer)
  436. # ... and do it!
  437. if '-s' not in opts:
  438. # process whole input as per normal...
  439. highlight(code, lexer, fmter, outfile)
  440. return 0
  441. else:
  442. # line by line processing of stdin (eg: for 'tail -f')...
  443. try:
  444. while 1:
  445. line = sys.stdin.buffer.readline()
  446. if not line:
  447. break
  448. if not inencoding:
  449. line = guess_decode_from_terminal(line, sys.stdin)[0]
  450. highlight(line, lexer, fmter, outfile)
  451. if hasattr(outfile, 'flush'):
  452. outfile.flush()
  453. return 0
  454. except KeyboardInterrupt: # pragma: no cover
  455. return 0
  456. def main(args=sys.argv):
  457. """
  458. Main command line entry point.
  459. """
  460. usage = USAGE % ((args[0],) * 6)
  461. try:
  462. popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:vhVHgsx")
  463. except getopt.GetoptError:
  464. print(usage, file=sys.stderr)
  465. return 2
  466. try:
  467. return main_inner(popts, args, usage)
  468. except Exception:
  469. if '-v' in dict(popts):
  470. print(file=sys.stderr)
  471. print('*' * 65, file=sys.stderr)
  472. print('An unhandled exception occurred while highlighting.',
  473. file=sys.stderr)
  474. print('Please report the whole traceback to the issue tracker at',
  475. file=sys.stderr)
  476. print('<https://github.com/pygments/pygments/issues>.',
  477. file=sys.stderr)
  478. print('*' * 65, file=sys.stderr)
  479. print(file=sys.stderr)
  480. raise
  481. import traceback
  482. info = traceback.format_exception(*sys.exc_info())
  483. msg = info[-1].strip()
  484. if len(info) >= 3:
  485. # extract relevant file and position info
  486. msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
  487. print(file=sys.stderr)
  488. print('*** Error while highlighting:', file=sys.stderr)
  489. print(msg, file=sys.stderr)
  490. print('*** If this is a bug you want to report, please rerun with -v.',
  491. file=sys.stderr)
  492. return 1