laywerrobot/lib/python3.6/site-packages/nltk/sem/lfg.py

211 lines
6.6 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# Natural Language Toolkit: Lexical Functional Grammar
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2018 NLTK Project
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
from __future__ import print_function, division, unicode_literals
from itertools import chain
from nltk.internals import Counter
from nltk.compat import python_2_unicode_compatible
@python_2_unicode_compatible
class FStructure(dict):
def safeappend(self, key, item):
"""
Append 'item' to the list at 'key'. If no list exists for 'key', then
construct one.
"""
if key not in self:
self[key] = []
self[key].append(item)
def __setitem__(self, key, value):
dict.__setitem__(self, key.lower(), value)
def __getitem__(self, key):
return dict.__getitem__(self, key.lower())
def __contains__(self, key):
return dict.__contains__(self, key.lower())
def to_glueformula_list(self, glue_dict):
depgraph = self.to_depgraph()
return glue_dict.to_glueformula_list(depgraph)
def to_depgraph(self, rel=None):
from nltk.parse.dependencygraph import DependencyGraph
depgraph = DependencyGraph()
nodes = depgraph.nodes
self._to_depgraph(nodes, 0, 'ROOT')
# Add all the dependencies for all the nodes
for address, node in nodes.items():
for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
if n2['head'] == address:
relation = n2['rel']
node['deps'].setdefault(relation,[])
node['deps'][relation].append(n2['address'])
depgraph.root = nodes[1]
return depgraph
def _to_depgraph(self, nodes, head, rel):
index = len(nodes)
nodes[index].update(
{
'address': index,
'word': self.pred[0],
'tag': self.pred[1],
'head': head,
'rel': rel,
}
)
for feature in sorted(self):
for item in sorted(self[feature]):
if isinstance(item, FStructure):
item._to_depgraph(nodes, index, feature)
elif isinstance(item, tuple):
new_index = len(nodes)
nodes[new_index].update(
{
'address': new_index,
'word': item[0],
'tag': item[1],
'head': index,
'rel': feature,
}
)
elif isinstance(item, list):
for n in item:
n._to_depgraph(nodes, index, feature)
else:
raise Exception('feature %s is not an FStruct, a list, or a tuple' % feature)
@staticmethod
def read_depgraph(depgraph):
return FStructure._read_depgraph(depgraph.root, depgraph)
@staticmethod
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
if not label_counter:
label_counter = Counter()
if node['rel'].lower() in ['spec', 'punct']:
# the value of a 'spec' entry is a word, not an FStructure
return (node['word'], node['tag'])
else:
fstruct = FStructure()
fstruct.pred = None
fstruct.label = FStructure._make_label(label_counter.get())
fstruct.parent = parent
word, tag = node['word'], node['tag']
if tag[:2] == 'VB':
if tag[2:3] == 'D':
fstruct.safeappend('tense', ('PAST', 'tense'))
fstruct.pred = (word, tag[:2])
if not fstruct.pred:
fstruct.pred = (word, tag)
children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
for child in children:
fstruct.safeappend(child['rel'], FStructure._read_depgraph(child, depgraph, label_counter, fstruct))
return fstruct
@staticmethod
def _make_label(value):
"""
Pick an alphabetic character as identifier for an entity in the model.
:param value: where to index into the list of characters
:type value: int
"""
letter = ['f','g','h','i','j','k','l','m','n','o','p','q','r','s',
't','u','v','w','x','y','z','a','b','c','d','e'][value-1]
num = int(value) // 26
if num > 0:
return letter + str(num)
else:
return letter
def __repr__(self):
return self.__unicode__().replace('\n', '')
def __str__(self):
return self.pretty_format()
def pretty_format(self, indent=3):
try:
accum = '%s:[' % self.label
except NameError:
accum = '['
try:
accum += 'pred \'%s\'' % (self.pred[0])
except NameError:
pass
for feature in sorted(self):
for item in self[feature]:
if isinstance(item, FStructure):
next_indent = indent+len(feature)+3+len(self.label)
accum += '\n%s%s %s' % (' '*(indent), feature, item.pretty_format(next_indent))
elif isinstance(item, tuple):
accum += '\n%s%s \'%s\'' % (' '*(indent), feature, item[0])
elif isinstance(item, list):
accum += '\n%s%s {%s}' % (' '*(indent), feature, ('\n%s' % (' '*(indent+len(feature)+2))).join(item))
else: # ERROR
raise Exception('feature %s is not an FStruct, a list, or a tuple' % feature)
return accum+']'
def demo_read_depgraph():
from nltk.parse.dependencygraph import DependencyGraph
dg1 = DependencyGraph("""\
Esso NNP 2 SUB
said VBD 0 ROOT
the DT 5 NMOD
Whiting NNP 5 NMOD
field NN 6 SUB
started VBD 2 VMOD
production NN 6 OBJ
Tuesday NNP 6 VMOD
""")
dg2 = DependencyGraph("""\
John NNP 2 SUB
sees VBP 0 ROOT
Mary NNP 2 OBJ
""")
dg3 = DependencyGraph("""\
a DT 2 SPEC
man NN 3 SUBJ
walks VB 0 ROOT
""")
dg4 = DependencyGraph("""\
every DT 2 SPEC
girl NN 3 SUBJ
chases VB 0 ROOT
a DT 5 SPEC
dog NN 3 OBJ
""")
depgraphs = [dg1,dg2,dg3,dg4]
for dg in depgraphs:
print(FStructure.read_depgraph(dg))
if __name__ == '__main__':
demo_read_depgraph()