You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

564 lines
21 KiB

4 years ago
  1. #!/usr/bin/env python
  2. """NbConvert is a utility for conversion of .ipynb files.
  3. Command-line interface for the NbConvert conversion utility.
  4. """
  5. # Copyright (c) IPython Development Team.
  6. # Distributed under the terms of the Modified BSD License.
  7. from __future__ import print_function
  8. import logging
  9. import sys
  10. import os
  11. import glob
  12. from textwrap import fill, dedent
  13. from ipython_genutils.text import indent
  14. from jupyter_core.application import JupyterApp, base_aliases, base_flags
  15. from traitlets.config import catch_config_error, Configurable
  16. from traitlets import (
  17. Unicode, List, Instance, DottedObjectName, Type, Bool,
  18. default, observe,
  19. )
  20. from traitlets.utils.importstring import import_item
  21. from .exporters.base import get_export_names, get_exporter
  22. from nbconvert import exporters, preprocessors, writers, postprocessors, __version__
  23. from .utils.base import NbConvertBase
  24. from .utils.exceptions import ConversionException
  25. from .utils.io import unicode_stdin_stream
  26. #-----------------------------------------------------------------------------
  27. #Classes and functions
  28. #-----------------------------------------------------------------------------
  29. class DottedOrNone(DottedObjectName):
  30. """A string holding a valid dotted object name in Python, such as A.b3._c
  31. Also allows for None type.
  32. """
  33. default_value = u''
  34. def validate(self, obj, value):
  35. if value is not None and len(value) > 0:
  36. return super(DottedOrNone, self).validate(obj, value)
  37. else:
  38. return value
  39. nbconvert_aliases = {}
  40. nbconvert_aliases.update(base_aliases)
  41. nbconvert_aliases.update({
  42. 'to' : 'NbConvertApp.export_format',
  43. 'template' : 'TemplateExporter.template_file',
  44. 'writer' : 'NbConvertApp.writer_class',
  45. 'post': 'NbConvertApp.postprocessor_class',
  46. 'output': 'NbConvertApp.output_base',
  47. 'output-dir': 'FilesWriter.build_directory',
  48. 'reveal-prefix': 'SlidesExporter.reveal_url_prefix',
  49. 'nbformat': 'NotebookExporter.nbformat_version',
  50. })
  51. nbconvert_flags = {}
  52. nbconvert_flags.update(base_flags)
  53. nbconvert_flags.update({
  54. 'execute' : (
  55. {'ExecutePreprocessor' : {'enabled' : True}},
  56. "Execute the notebook prior to export."
  57. ),
  58. 'allow-errors' : (
  59. {'ExecutePreprocessor' : {'allow_errors' : True}},
  60. ("Continue notebook execution even if one of the cells throws "
  61. "an error and include the error message in the cell output "
  62. "(the default behaviour is to abort conversion). This flag "
  63. "is only relevant if '--execute' was specified, too.")
  64. ),
  65. 'stdin' : (
  66. {'NbConvertApp' : {
  67. 'from_stdin' : True,
  68. }
  69. },
  70. "read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'"
  71. ),
  72. 'stdout' : (
  73. {'NbConvertApp' : {'writer_class' : "StdoutWriter"}},
  74. "Write notebook output to stdout instead of files."
  75. ),
  76. 'inplace' : (
  77. {
  78. 'NbConvertApp' : {
  79. 'use_output_suffix' : False,
  80. 'export_format' : 'notebook',
  81. },
  82. 'FilesWriter' : {'build_directory': ''},
  83. },
  84. """Run nbconvert in place, overwriting the existing notebook (only
  85. relevant when converting to notebook format)"""
  86. ),
  87. 'clear-output' : (
  88. {
  89. 'NbConvertApp' : {
  90. 'use_output_suffix' : False,
  91. 'export_format' : 'notebook',
  92. },
  93. 'FilesWriter' : {'build_directory': ''},
  94. 'ClearOutputPreprocessor' : {'enabled' : True},
  95. },
  96. """Clear output of current file and save in place,
  97. overwriting the existing notebook. """
  98. ),
  99. 'no-prompt' : (
  100. {'TemplateExporter' : {
  101. 'exclude_input_prompt' : True,
  102. 'exclude_output_prompt' : True,
  103. }
  104. },
  105. "Exclude input and output prompts from converted document."
  106. ),
  107. 'no-input' : (
  108. {'TemplateExporter' : {
  109. 'exclude_output_prompt' : True,
  110. 'exclude_input': True,
  111. }
  112. },
  113. """Exclude input cells and output prompts from converted document.
  114. This mode is ideal for generating code-free reports."""
  115. ),
  116. })
  117. class NbConvertApp(JupyterApp):
  118. """Application used to convert from notebook file type (``*.ipynb``)"""
  119. version = __version__
  120. name = 'jupyter-nbconvert'
  121. aliases = nbconvert_aliases
  122. flags = nbconvert_flags
  123. @default('log_level')
  124. def _log_level_default(self):
  125. return logging.INFO
  126. classes = List()
  127. @default('classes')
  128. def _classes_default(self):
  129. classes = [NbConvertBase]
  130. for pkg in (exporters, preprocessors, writers, postprocessors):
  131. for name in dir(pkg):
  132. cls = getattr(pkg, name)
  133. if isinstance(cls, type) and issubclass(cls, Configurable):
  134. classes.append(cls)
  135. return classes
  136. description = Unicode(
  137. u"""This application is used to convert notebook files (*.ipynb)
  138. to various other formats.
  139. WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.""")
  140. output_base = Unicode('', help='''overwrite base name use for output files.
  141. can only be used when converting one notebook at a time.
  142. ''').tag(config=True)
  143. use_output_suffix = Bool(
  144. True,
  145. help="""Whether to apply a suffix prior to the extension (only relevant
  146. when converting to notebook format). The suffix is determined by
  147. the exporter, and is usually '.nbconvert'."""
  148. ).tag(config=True)
  149. output_files_dir = Unicode('{notebook_name}_files',
  150. help='''Directory to copy extra files (figures) to.
  151. '{notebook_name}' in the string will be converted to notebook
  152. basename.'''
  153. ).tag(config=True)
  154. examples = Unicode(u"""
  155. The simplest way to use nbconvert is
  156. > jupyter nbconvert mynotebook.ipynb
  157. which will convert mynotebook.ipynb to the default format (probably HTML).
  158. You can specify the export format with `--to`.
  159. Options include {formats}.
  160. > jupyter nbconvert --to latex mynotebook.ipynb
  161. Both HTML and LaTeX support multiple output templates. LaTeX includes
  162. 'base', 'article' and 'report'. HTML includes 'basic' and 'full'. You
  163. can specify the flavor of the format used.
  164. > jupyter nbconvert --to html --template basic mynotebook.ipynb
  165. You can also pipe the output to stdout, rather than a file
  166. > jupyter nbconvert mynotebook.ipynb --stdout
  167. PDF is generated via latex
  168. > jupyter nbconvert mynotebook.ipynb --to pdf
  169. You can get (and serve) a Reveal.js-powered slideshow
  170. > jupyter nbconvert myslides.ipynb --to slides --post serve
  171. Multiple notebooks can be given at the command line in a couple of
  172. different ways:
  173. > jupyter nbconvert notebook*.ipynb
  174. > jupyter nbconvert notebook1.ipynb notebook2.ipynb
  175. or you can specify the notebooks list in a config file, containing::
  176. c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
  177. > jupyter nbconvert --config mycfg.py
  178. """.format(formats=get_export_names()))
  179. # Writer specific variables
  180. writer = Instance('nbconvert.writers.base.WriterBase',
  181. help="""Instance of the writer class used to write the
  182. results of the conversion.""", allow_none=True)
  183. writer_class = DottedObjectName('FilesWriter',
  184. help="""Writer class used to write the
  185. results of the conversion""").tag(config=True)
  186. writer_aliases = {'fileswriter': 'nbconvert.writers.files.FilesWriter',
  187. 'debugwriter': 'nbconvert.writers.debug.DebugWriter',
  188. 'stdoutwriter': 'nbconvert.writers.stdout.StdoutWriter'}
  189. writer_factory = Type(allow_none=True)
  190. @observe('writer_class')
  191. def _writer_class_changed(self, change):
  192. new = change['new']
  193. if new.lower() in self.writer_aliases:
  194. new = self.writer_aliases[new.lower()]
  195. self.writer_factory = import_item(new)
  196. # Post-processor specific variables
  197. postprocessor = Instance('nbconvert.postprocessors.base.PostProcessorBase',
  198. help="""Instance of the PostProcessor class used to write the
  199. results of the conversion.""", allow_none=True)
  200. postprocessor_class = DottedOrNone(
  201. help="""PostProcessor class used to write the
  202. results of the conversion"""
  203. ).tag(config=True)
  204. postprocessor_aliases = {'serve': 'nbconvert.postprocessors.serve.ServePostProcessor'}
  205. postprocessor_factory = Type(None, allow_none=True)
  206. @observe('postprocessor_class')
  207. def _postprocessor_class_changed(self, change):
  208. new = change['new']
  209. if new.lower() in self.postprocessor_aliases:
  210. new = self.postprocessor_aliases[new.lower()]
  211. if new:
  212. self.postprocessor_factory = import_item(new)
  213. ipywidgets_base_url = Unicode("https://unpkg.com/",
  214. help="URL base for ipywidgets package").tag(config=True)
  215. export_format = Unicode(
  216. 'html',
  217. allow_none=False,
  218. help="""The export format to be used, either one of the built-in formats
  219. {formats}
  220. or a dotted object name that represents the import path for an
  221. `Exporter` class""".format(formats=get_export_names())
  222. ).tag(config=True)
  223. notebooks = List([], help="""List of notebooks to convert.
  224. Wildcards are supported.
  225. Filenames passed positionally will be added to the list.
  226. """
  227. ).tag(config=True)
  228. from_stdin = Bool(False, help="read a single notebook from stdin.").tag(config=True)
  229. @catch_config_error
  230. def initialize(self, argv=None):
  231. """Initialize application, notebooks, writer, and postprocessor"""
  232. self.init_syspath()
  233. super(NbConvertApp, self).initialize(argv)
  234. self.init_notebooks()
  235. self.init_writer()
  236. self.init_postprocessor()
  237. def init_syspath(self):
  238. """Add the cwd to the sys.path ($PYTHONPATH)"""
  239. sys.path.insert(0, os.getcwd())
  240. def init_notebooks(self):
  241. """Construct the list of notebooks.
  242. If notebooks are passed on the command-line,
  243. they override (rather than add) notebooks specified in config files.
  244. Glob each notebook to replace notebook patterns with filenames.
  245. """
  246. # Specifying notebooks on the command-line overrides (rather than
  247. # adds) the notebook list
  248. if self.extra_args:
  249. patterns = self.extra_args
  250. else:
  251. patterns = self.notebooks
  252. # Use glob to replace all the notebook patterns with filenames.
  253. filenames = []
  254. for pattern in patterns:
  255. # Use glob to find matching filenames. Allow the user to convert
  256. # notebooks without having to type the extension.
  257. globbed_files = glob.glob(pattern)
  258. globbed_files.extend(glob.glob(pattern + '.ipynb'))
  259. if not globbed_files:
  260. self.log.warning("pattern %r matched no files", pattern)
  261. for filename in globbed_files:
  262. if not filename in filenames:
  263. filenames.append(filename)
  264. self.notebooks = filenames
  265. def init_writer(self):
  266. """Initialize the writer (which is stateless)"""
  267. self._writer_class_changed({ 'new': self.writer_class })
  268. self.writer = self.writer_factory(parent=self)
  269. if hasattr(self.writer, 'build_directory') and self.writer.build_directory != '':
  270. self.use_output_suffix = False
  271. def init_postprocessor(self):
  272. """Initialize the postprocessor (which is stateless)"""
  273. self._postprocessor_class_changed({'new': self.postprocessor_class})
  274. if self.postprocessor_factory:
  275. self.postprocessor = self.postprocessor_factory(parent=self)
  276. def start(self):
  277. """Run start after initialization process has completed"""
  278. super(NbConvertApp, self).start()
  279. self.convert_notebooks()
  280. def init_single_notebook_resources(self, notebook_filename):
  281. """Step 1: Initialize resources
  282. This initializes the resources dictionary for a single notebook.
  283. Returns
  284. -------
  285. dict
  286. resources dictionary for a single notebook that MUST include the following keys:
  287. - config_dir: the location of the Jupyter config directory
  288. - unique_key: the notebook name
  289. - output_files_dir: a directory where output files (not
  290. including the notebook itself) should be saved
  291. """
  292. basename = os.path.basename(notebook_filename)
  293. notebook_name = basename[:basename.rfind('.')]
  294. if self.output_base:
  295. # strip duplicate extension from output_base, to avoid Basename.ext.ext
  296. if getattr(self.exporter, 'file_extension', False):
  297. base, ext = os.path.splitext(self.output_base)
  298. if ext == self.exporter.file_extension:
  299. self.output_base = base
  300. notebook_name = self.output_base
  301. self.log.debug("Notebook name is '%s'", notebook_name)
  302. # first initialize the resources we want to use
  303. resources = {}
  304. resources['config_dir'] = self.config_dir
  305. resources['unique_key'] = notebook_name
  306. output_files_dir = (self.output_files_dir
  307. .format(notebook_name=notebook_name))
  308. resources['output_files_dir'] = output_files_dir
  309. resources['ipywidgets_base_url'] = self.ipywidgets_base_url
  310. return resources
  311. def export_single_notebook(self, notebook_filename, resources, input_buffer=None):
  312. """Step 2: Export the notebook
  313. Exports the notebook to a particular format according to the specified
  314. exporter. This function returns the output and (possibly modified)
  315. resources from the exporter.
  316. Parameters
  317. ----------
  318. notebook_filename : str
  319. name of notebook file.
  320. resources : dict
  321. input_buffer :
  322. readable file-like object returning unicode.
  323. if not None, notebook_filename is ignored
  324. Returns
  325. -------
  326. output
  327. dict
  328. resources (possibly modified)
  329. """
  330. try:
  331. if input_buffer is not None:
  332. output, resources = self.exporter.from_file(input_buffer, resources=resources)
  333. else:
  334. output, resources = self.exporter.from_filename(notebook_filename, resources=resources)
  335. except ConversionException:
  336. self.log.error("Error while converting '%s'", notebook_filename, exc_info=True)
  337. self.exit(1)
  338. return output, resources
  339. def write_single_notebook(self, output, resources):
  340. """Step 3: Write the notebook to file
  341. This writes output from the exporter to file using the specified writer.
  342. It returns the results from the writer.
  343. Parameters
  344. ----------
  345. output :
  346. resources : dict
  347. resources for a single notebook including name, config directory
  348. and directory to save output
  349. Returns
  350. -------
  351. file
  352. results from the specified writer output of exporter
  353. """
  354. if 'unique_key' not in resources:
  355. raise KeyError("unique_key MUST be specified in the resources, but it is not")
  356. notebook_name = resources['unique_key']
  357. if self.use_output_suffix and not self.output_base:
  358. notebook_name += resources.get('output_suffix', '')
  359. write_results = self.writer.write(
  360. output, resources, notebook_name=notebook_name)
  361. return write_results
  362. def postprocess_single_notebook(self, write_results):
  363. """Step 4: Post-process the written file
  364. Only used if a postprocessor has been specified. After the
  365. converted notebook is written to a file in Step 3, this post-processes
  366. the notebook.
  367. """
  368. # Post-process if post processor has been defined.
  369. if hasattr(self, 'postprocessor') and self.postprocessor:
  370. self.postprocessor(write_results)
  371. def convert_single_notebook(self, notebook_filename, input_buffer=None):
  372. """Convert a single notebook.
  373. Performs the following steps:
  374. 1. Initialize notebook resources
  375. 2. Export the notebook to a particular format
  376. 3. Write the exported notebook to file
  377. 4. (Maybe) postprocess the written file
  378. Parameters
  379. ----------
  380. notebook_filename : str
  381. input_buffer :
  382. If input_buffer is not None, conversion is done and the buffer is
  383. used as source into a file basenamed by the notebook_filename
  384. argument.
  385. """
  386. if input_buffer is None:
  387. self.log.info("Converting notebook %s to %s", notebook_filename, self.export_format)
  388. else:
  389. self.log.info("Converting notebook into %s", self.export_format)
  390. resources = self.init_single_notebook_resources(notebook_filename)
  391. output, resources = self.export_single_notebook(notebook_filename, resources, input_buffer=input_buffer)
  392. write_results = self.write_single_notebook(output, resources)
  393. self.postprocess_single_notebook(write_results)
  394. def convert_notebooks(self):
  395. """Convert the notebooks in the self.notebook traitlet """
  396. # check that the output base isn't specified if there is more than
  397. # one notebook to convert
  398. if self.output_base != '' and len(self.notebooks) > 1:
  399. self.log.error(
  400. """
  401. UsageError: --output flag or `NbConvertApp.output_base` config option
  402. cannot be used when converting multiple notebooks.
  403. """
  404. )
  405. self.exit(1)
  406. # initialize the exporter
  407. cls = get_exporter(self.export_format)
  408. self.exporter = cls(config=self.config)
  409. # no notebooks to convert!
  410. if len(self.notebooks) == 0 and not self.from_stdin:
  411. self.print_help()
  412. sys.exit(-1)
  413. # convert each notebook
  414. if not self.from_stdin:
  415. for notebook_filename in self.notebooks:
  416. self.convert_single_notebook(notebook_filename)
  417. else:
  418. input_buffer = unicode_stdin_stream()
  419. # default name when conversion from stdin
  420. self.convert_single_notebook("notebook.ipynb", input_buffer=input_buffer)
  421. def document_flag_help(self):
  422. """
  423. Return a string containing descriptions of all the flags.
  424. """
  425. flags = "The following flags are defined:\n\n"
  426. for flag, (cfg, fhelp) in self.flags.items():
  427. flags += "{}\n".format(flag)
  428. flags += indent(fill(fhelp, 80)) + '\n\n'
  429. flags += indent(fill("Long Form: "+str(cfg), 80)) + '\n\n'
  430. return flags
  431. def document_alias_help(self):
  432. """Return a string containing all of the aliases"""
  433. aliases = "The folowing aliases are defined:\n\n"
  434. for alias, longname in self.aliases.items():
  435. aliases += "\t**{}** ({})\n\n".format(alias, longname)
  436. return aliases
  437. def document_config_options(self):
  438. """
  439. Provides a much improves version of the configuration documentation by
  440. breaking the configuration options into app, exporter, writer,
  441. preprocessor, postprocessor, and other sections.
  442. """
  443. categories = {category: [c for c in self._classes_inc_parents() if category in c.__name__.lower()]
  444. for category in ['app', 'exporter', 'writer', 'preprocessor', 'postprocessor']}
  445. accounted_for = {c for category in categories.values() for c in category}
  446. categories['other']= [c for c in self._classes_inc_parents() if c not in accounted_for]
  447. header = dedent("""
  448. {section} Options
  449. -----------------------
  450. """)
  451. sections = ""
  452. for category in categories:
  453. sections += header.format(section=category.title())
  454. if category in ['exporter','preprocessor','writer']:
  455. sections += ".. image:: _static/{image}_inheritance.png\n\n".format(image=category)
  456. sections += '\n'.join(c.class_config_rst_doc() for c in categories[category])
  457. return sections.replace(' : ',r' \: ')
  458. #-----------------------------------------------------------------------------
  459. # Main entry point
  460. #-----------------------------------------------------------------------------
  461. main = launch_new_instance = NbConvertApp.launch_instance