|
|
- # coding: utf8
- from __future__ import unicode_literals
-
-
- def explain(term):
- """Get a description for a given POS tag, dependency label or entity type.
-
- term (unicode): The term to explain.
- RETURNS (unicode): The explanation, or `None` if not found in the glossary.
-
- EXAMPLE:
- >>> spacy.explain(u'NORP')
- >>> doc = nlp(u'Hello world')
- >>> print([w.text, w.tag_, spacy.explain(w.tag_) for w in doc])
- """
- if term in GLOSSARY:
- return GLOSSARY[term]
-
-
- GLOSSARY = {
- # POS tags
- # Universal POS Tags
- # http://universaldependencies.org/u/pos/
-
- 'ADJ': 'adjective',
- 'ADP': 'adposition',
- 'ADV': 'adverb',
- 'AUX': 'auxiliary',
- 'CONJ': 'conjunction',
- 'CCONJ': 'coordinating conjunction',
- 'DET': 'determiner',
- 'INTJ': 'interjection',
- 'NOUN': 'noun',
- 'NUM': 'numeral',
- 'PART': 'particle',
- 'PRON': 'pronoun',
- 'PROPN': 'proper noun',
- 'PUNCT': 'punctuation',
- 'SCONJ': 'subordinating conjunction',
- 'SYM': 'symbol',
- 'VERB': 'verb',
- 'X': 'other',
- 'EOL': 'end of line',
- 'SPACE': 'space',
-
-
- # POS tags (English)
- # OntoNotes 5 / Penn Treebank
- # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
-
- '.': 'punctuation mark, sentence closer',
- ',': 'punctuation mark, comma',
- '-LRB-': 'left round bracket',
- '-RRB-': 'right round bracket',
- '``': 'opening quotation mark',
- '""': 'closing quotation mark',
- "''": 'closing quotation mark',
- ':': 'punctuation mark, colon or ellipsis',
- '$': 'symbol, currency',
- '#': 'symbol, number sign',
- 'AFX': 'affix',
- 'CC': 'conjunction, coordinating',
- 'CD': 'cardinal number',
- 'DT': 'determiner',
- 'EX': 'existential there',
- 'FW': 'foreign word',
- 'HYPH': 'punctuation mark, hyphen',
- 'IN': 'conjunction, subordinating or preposition',
- 'JJ': 'adjective',
- 'JJR': 'adjective, comparative',
- 'JJS': 'adjective, superlative',
- 'LS': 'list item marker',
- 'MD': 'verb, modal auxiliary',
- 'NIL': 'missing tag',
- 'NN': 'noun, singular or mass',
- 'NNP': 'noun, proper singular',
- 'NNPS': 'noun, proper plural',
- 'NNS': 'noun, plural',
- 'PDT': 'predeterminer',
- 'POS': 'possessive ending',
- 'PRP': 'pronoun, personal',
- 'PRP$': 'pronoun, possessive',
- 'RB': 'adverb',
- 'RBR': 'adverb, comparative',
- 'RBS': 'adverb, superlative',
- 'RP': 'adverb, particle',
- 'TO': 'infinitival to',
- 'UH': 'interjection',
- 'VB': 'verb, base form',
- 'VBD': 'verb, past tense',
- 'VBG': 'verb, gerund or present participle',
- 'VBN': 'verb, past participle',
- 'VBP': 'verb, non-3rd person singular present',
- 'VBZ': 'verb, 3rd person singular present',
- 'WDT': 'wh-determiner',
- 'WP': 'wh-pronoun, personal',
- 'WP$': 'wh-pronoun, possessive',
- 'WRB': 'wh-adverb',
- 'SP': 'space',
- 'ADD': 'email',
- 'NFP': 'superfluous punctuation',
- 'GW': 'additional word in multi-word expression',
- 'XX': 'unknown',
- 'BES': 'auxiliary "be"',
- 'HVS': 'forms of "have"',
-
-
- # POS Tags (German)
- # TIGER Treebank
- # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
-
- '$(': 'other sentence-internal punctuation mark',
- '$,': 'comma',
- '$.': 'sentence-final punctuation mark',
- 'ADJA': 'adjective, attributive',
- 'ADJD': 'adjective, adverbial or predicative',
- 'APPO': 'postposition',
- 'APPR': 'preposition; circumposition left',
- 'APPRART': 'preposition with article',
- 'APZR': 'circumposition right',
- 'ART': 'definite or indefinite article',
- 'CARD': 'cardinal number',
- 'FM': 'foreign language material',
- 'ITJ': 'interjection',
- 'KOKOM': 'comparative conjunction',
- 'KON': 'coordinate conjunction',
- 'KOUI': 'subordinate conjunction with "zu" and infinitive',
- 'KOUS': 'subordinate conjunction with sentence',
- 'NE': 'proper noun',
- 'NNE': 'proper noun',
- 'PAV': 'pronominal adverb',
- 'PROAV': 'pronominal adverb',
- 'PDAT': 'attributive demonstrative pronoun',
- 'PDS': 'substituting demonstrative pronoun',
- 'PIAT': 'attributive indefinite pronoun without determiner',
- 'PIDAT': 'attributive indefinite pronoun with determiner',
- 'PIS': 'substituting indefinite pronoun',
- 'PPER': 'non-reflexive personal pronoun',
- 'PPOSAT': 'attributive possessive pronoun',
- 'PPOSS': 'substituting possessive pronoun',
- 'PRELAT': 'attributive relative pronoun',
- 'PRELS': 'substituting relative pronoun',
- 'PRF': 'reflexive personal pronoun',
- 'PTKA': 'particle with adjective or adverb',
- 'PTKANT': 'answer particle',
- 'PTKNEG': 'negative particle',
- 'PTKVZ': 'separable verbal particle',
- 'PTKZU': '"zu" before infinitive',
- 'PWAT': 'attributive interrogative pronoun',
- 'PWAV': 'adverbial interrogative or relative pronoun',
- 'PWS': 'substituting interrogative pronoun',
- 'TRUNC': 'word remnant',
- 'VAFIN': 'finite verb, auxiliary',
- 'VAIMP': 'imperative, auxiliary',
- 'VAINF': 'infinitive, auxiliary',
- 'VAPP': 'perfect participle, auxiliary',
- 'VMFIN': 'finite verb, modal',
- 'VMINF': 'infinitive, modal',
- 'VMPP': 'perfect participle, modal',
- 'VVFIN': 'finite verb, full',
- 'VVIMP': 'imperative, full',
- 'VVINF': 'infinitive, full',
- 'VVIZU': 'infinitive with "zu", full',
- 'VVPP': 'perfect participle, full',
- 'XY': 'non-word containing non-letter',
-
-
- # Noun chunks
-
- 'NP': 'noun phrase',
- 'PP': 'prepositional phrase',
- 'VP': 'verb phrase',
- 'ADVP': 'adverb phrase',
- 'ADJP': 'adjective phrase',
- 'SBAR': 'subordinating conjunction',
- 'PRT': 'particle',
- 'PNP': 'prepositional noun phrase',
-
-
- # Dependency Labels (English)
- # ClearNLP / Universal Dependencies
- # https://github.com/clir/clearnlp-guidelines/blob/master/md/specifications/dependency_labels.md
-
- 'acomp': 'adjectival complement',
- 'advcl': 'adverbial clause modifier',
- 'advmod': 'adverbial modifier',
- 'agent': 'agent',
- 'amod': 'adjectival modifier',
- 'appos': 'appositional modifier',
- 'attr': 'attribute',
- 'aux': 'auxiliary',
- 'auxpass': 'auxiliary (passive)',
- 'cc': 'coordinating conjunction',
- 'ccomp': 'clausal complement',
- 'complm': 'complementizer',
- 'conj': 'conjunct',
- 'cop': 'copula',
- 'csubj': 'clausal subject',
- 'csubjpass': 'clausal subject (passive)',
- 'dep': 'unclassified dependent',
- 'det': 'determiner',
- 'dobj': 'direct object',
- 'expl': 'expletive',
- 'hmod': 'modifier in hyphenation',
- 'hyph': 'hyphen',
- 'infmod': 'infinitival modifier',
- 'intj': 'interjection',
- 'iobj': 'indirect object',
- 'mark': 'marker',
- 'meta': 'meta modifier',
- 'neg': 'negation modifier',
- 'nmod': 'modifier of nominal',
- 'nn': 'noun compound modifier',
- 'npadvmod': 'noun phrase as adverbial modifier',
- 'nsubj': 'nominal subject',
- 'nsubjpass': 'nominal subject (passive)',
- 'num': 'number modifier',
- 'number': 'number compound modifier',
- 'oprd': 'object predicate',
- 'obj': 'object',
- 'obl': 'oblique nominal',
- 'parataxis': 'parataxis',
- 'partmod': 'participal modifier',
- 'pcomp': 'complement of preposition',
- 'pobj': 'object of preposition',
- 'poss': 'possession modifier',
- 'possessive': 'possessive modifier',
- 'preconj': 'pre-correlative conjunction',
- 'prep': 'prepositional modifier',
- 'prt': 'particle',
- 'punct': 'punctuation',
- 'quantmod': 'modifier of quantifier',
- 'rcmod': 'relative clause modifier',
- 'root': 'root',
- 'xcomp': 'open clausal complement',
-
-
- # Dependency labels (German)
- # TIGER Treebank
- # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
- # currently missing: 'cc' (comparative complement) because of conflict
- # with English labels
-
- 'ac': 'adpositional case marker',
- 'adc': 'adjective component',
- 'ag': 'genitive attribute',
- 'ams': 'measure argument of adjective',
- 'app': 'apposition',
- 'avc': 'adverbial phrase component',
- 'cd': 'coordinating conjunction',
- 'cj': 'conjunct',
- 'cm': 'comparative conjunction',
- 'cp': 'complementizer',
- 'cvc': 'collocational verb construction',
- 'da': 'dative',
- 'dh': 'discourse-level head',
- 'dm': 'discourse marker',
- 'ep': 'expletive es',
- 'hd': 'head',
- 'ju': 'junctor',
- 'mnr': 'postnominal modifier',
- 'mo': 'modifier',
- 'ng': 'negation',
- 'nk': 'noun kernel element',
- 'nmc': 'numerical component',
- 'oa': 'accusative object',
- 'oc': 'clausal object',
- 'og': 'genitive object',
- 'op': 'prepositional object',
- 'par': 'parenthetical element',
- 'pd': 'predicate',
- 'pg': 'phrasal genitive',
- 'ph': 'placeholder',
- 'pm': 'morphological particle',
- 'pnc': 'proper noun component',
- 'rc': 'relative clause',
- 're': 'repeated element',
- 'rs': 'reported speech',
- 'sb': 'subject',
-
-
- # Named Entity Recognition
- # OntoNotes 5
- # https://catalog.ldc.upenn.edu/docs/LDC2013T19/OntoNotes-Release-5.0.pdf
-
- 'PERSON': 'People, including fictional',
- 'NORP': 'Nationalities or religious or political groups',
- 'FACILITY': 'Buildings, airports, highways, bridges, etc.',
- 'FAC': 'Buildings, airports, highways, bridges, etc.',
- 'ORG': 'Companies, agencies, institutions, etc.',
- 'GPE': 'Countries, cities, states',
- 'LOC': 'Non-GPE locations, mountain ranges, bodies of water',
- 'PRODUCT': 'Objects, vehicles, foods, etc. (not services)',
- 'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
- 'WORK_OF_ART': 'Titles of books, songs, etc.',
- 'LAW': 'Named documents made into laws.',
- 'LANGUAGE': 'Any named language',
- 'DATE': 'Absolute or relative dates or periods',
- 'TIME': 'Times smaller than a day',
- 'PERCENT': 'Percentage, including "%"',
- 'MONEY': 'Monetary values, including unit',
- 'QUANTITY': 'Measurements, as of weight or distance',
- 'ORDINAL': '"first", "second", etc.',
- 'CARDINAL': 'Numerals that do not fall under another type',
-
-
- # Named Entity Recognition
- # Wikipedia
- # http://www.sciencedirect.com/science/article/pii/S0004370212000276
- # https://pdfs.semanticscholar.org/5744/578cc243d92287f47448870bb426c66cc941.pdf
-
- 'PER': 'Named person or family.',
- 'MISC': ('Miscellaneous entities, e.g. events, nationalities, '
- 'products or works of art'),
- }
|