You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
1.6 KiB

4 years ago
  1. # Natural Language Toolkit (NLTK) Help
  2. #
  3. # Copyright (C) 2001-2019 NLTK Project
  4. # Authors: Steven Bird <stevenbird1@gmail.com>
  5. # URL: <http://nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. """
  8. Provide structured access to documentation.
  9. """
  10. from __future__ import print_function
  11. import re
  12. from textwrap import wrap
  13. from nltk.data import load
  14. def brown_tagset(tagpattern=None):
  15. _format_tagset("brown_tagset", tagpattern)
  16. def claws5_tagset(tagpattern=None):
  17. _format_tagset("claws5_tagset", tagpattern)
  18. def upenn_tagset(tagpattern=None):
  19. _format_tagset("upenn_tagset", tagpattern)
  20. #####################################################################
  21. # UTILITIES
  22. #####################################################################
  23. def _print_entries(tags, tagdict):
  24. for tag in tags:
  25. entry = tagdict[tag]
  26. defn = [tag + ": " + entry[0]]
  27. examples = wrap(
  28. entry[1], width=75, initial_indent=' ', subsequent_indent=' '
  29. )
  30. print("\n".join(defn + examples))
  31. def _format_tagset(tagset, tagpattern=None):
  32. tagdict = load("help/tagsets/" + tagset + ".pickle")
  33. if not tagpattern:
  34. _print_entries(sorted(tagdict), tagdict)
  35. elif tagpattern in tagdict:
  36. _print_entries([tagpattern], tagdict)
  37. else:
  38. tagpattern = re.compile(tagpattern)
  39. tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
  40. if tags:
  41. _print_entries(tags, tagdict)
  42. else:
  43. print("No matching tags found.")
  44. if __name__ == '__main__':
  45. brown_tagset(r'NN.*')
  46. upenn_tagset(r'.*\$')
  47. claws5_tagset('UNDEFINED')
  48. brown_tagset(r'NN')