581 lines
22 KiB
Python
581 lines
22 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# Natural Language Toolkit: Chart Parser for Feature-Based Grammars
|
||
|
#
|
||
|
# Copyright (C) 2001-2018 NLTK Project
|
||
|
# Author: Rob Speer <rspeer@mit.edu>
|
||
|
# Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
|
||
|
# URL: <http://nltk.org/>
|
||
|
# For license information, see LICENSE.TXT
|
||
|
|
||
|
"""
|
||
|
Extension of chart parsing implementation to handle grammars with
|
||
|
feature structures as nodes.
|
||
|
"""
|
||
|
from __future__ import print_function, unicode_literals
|
||
|
|
||
|
from six.moves import range
|
||
|
|
||
|
from nltk.compat import python_2_unicode_compatible
|
||
|
from nltk.featstruct import FeatStruct, unify, TYPE, find_variables
|
||
|
from nltk.sem import logic
|
||
|
from nltk.tree import Tree
|
||
|
from nltk.grammar import (Nonterminal, Production, CFG,
|
||
|
FeatStructNonterminal, is_nonterminal,
|
||
|
is_terminal)
|
||
|
from nltk.parse.chart import (TreeEdge, Chart, ChartParser, EdgeI,
|
||
|
FundamentalRule, LeafInitRule,
|
||
|
EmptyPredictRule, BottomUpPredictRule,
|
||
|
SingleEdgeFundamentalRule,
|
||
|
BottomUpPredictCombineRule,
|
||
|
CachedTopDownPredictRule,
|
||
|
TopDownInitRule)
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Tree Edge
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
@python_2_unicode_compatible
|
||
|
class FeatureTreeEdge(TreeEdge):
|
||
|
"""
|
||
|
A specialized tree edge that allows shared variable bindings
|
||
|
between nonterminals on the left-hand side and right-hand side.
|
||
|
|
||
|
Each ``FeatureTreeEdge`` contains a set of ``bindings``, i.e., a
|
||
|
dictionary mapping from variables to values. If the edge is not
|
||
|
complete, then these bindings are simply stored. However, if the
|
||
|
edge is complete, then the constructor applies these bindings to
|
||
|
every nonterminal in the edge whose symbol implements the
|
||
|
interface ``SubstituteBindingsI``.
|
||
|
"""
|
||
|
def __init__(self, span, lhs, rhs, dot=0, bindings=None):
|
||
|
"""
|
||
|
Construct a new edge. If the edge is incomplete (i.e., if
|
||
|
``dot<len(rhs)``), then store the bindings as-is. If the edge
|
||
|
is complete (i.e., if ``dot==len(rhs)``), then apply the
|
||
|
bindings to all nonterminals in ``lhs`` and ``rhs``, and then
|
||
|
clear the bindings. See ``TreeEdge`` for a description of
|
||
|
the other arguments.
|
||
|
"""
|
||
|
if bindings is None: bindings = {}
|
||
|
|
||
|
# If the edge is complete, then substitute in the bindings,
|
||
|
# and then throw them away. (If we didn't throw them away, we
|
||
|
# might think that 2 complete edges are different just because
|
||
|
# they have different bindings, even though all bindings have
|
||
|
# already been applied.)
|
||
|
if dot == len(rhs) and bindings:
|
||
|
lhs = self._bind(lhs, bindings)
|
||
|
rhs = [self._bind(elt, bindings) for elt in rhs]
|
||
|
bindings = {}
|
||
|
|
||
|
# Initialize the edge.
|
||
|
TreeEdge.__init__(self, span, lhs, rhs, dot)
|
||
|
self._bindings = bindings
|
||
|
self._comparison_key = (self._comparison_key, tuple(sorted(bindings.items())))
|
||
|
|
||
|
@staticmethod
|
||
|
def from_production(production, index):
|
||
|
"""
|
||
|
:return: A new ``TreeEdge`` formed from the given production.
|
||
|
The new edge's left-hand side and right-hand side will
|
||
|
be taken from ``production``; its span will be
|
||
|
``(index,index)``; and its dot position will be ``0``.
|
||
|
:rtype: TreeEdge
|
||
|
"""
|
||
|
return FeatureTreeEdge(span=(index, index), lhs=production.lhs(),
|
||
|
rhs=production.rhs(), dot=0)
|
||
|
|
||
|
def move_dot_forward(self, new_end, bindings=None):
|
||
|
"""
|
||
|
:return: A new ``FeatureTreeEdge`` formed from this edge.
|
||
|
The new edge's dot position is increased by ``1``,
|
||
|
and its end index will be replaced by ``new_end``.
|
||
|
:rtype: FeatureTreeEdge
|
||
|
:param new_end: The new end index.
|
||
|
:type new_end: int
|
||
|
:param bindings: Bindings for the new edge.
|
||
|
:type bindings: dict
|
||
|
"""
|
||
|
return FeatureTreeEdge(span=(self._span[0], new_end),
|
||
|
lhs=self._lhs, rhs=self._rhs,
|
||
|
dot=self._dot+1, bindings=bindings)
|
||
|
|
||
|
def _bind(self, nt, bindings):
|
||
|
if not isinstance(nt, FeatStructNonterminal): return nt
|
||
|
return nt.substitute_bindings(bindings)
|
||
|
|
||
|
def next_with_bindings(self):
|
||
|
return self._bind(self.nextsym(), self._bindings)
|
||
|
|
||
|
def bindings(self):
|
||
|
"""
|
||
|
Return a copy of this edge's bindings dictionary.
|
||
|
"""
|
||
|
return self._bindings.copy()
|
||
|
|
||
|
def variables(self):
|
||
|
"""
|
||
|
:return: The set of variables used by this edge.
|
||
|
:rtype: set(Variable)
|
||
|
"""
|
||
|
return find_variables([self._lhs] + list(self._rhs) +
|
||
|
list(self._bindings.keys()) +
|
||
|
list(self._bindings.values()),
|
||
|
fs_class=FeatStruct)
|
||
|
|
||
|
def __str__(self):
|
||
|
if self.is_complete():
|
||
|
return TreeEdge.__unicode__(self)
|
||
|
else:
|
||
|
bindings = '{%s}' % ', '.join('%s: %r' % item for item in
|
||
|
sorted(self._bindings.items()))
|
||
|
return '%s %s' % (TreeEdge.__unicode__(self), bindings)
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# A specialized Chart for feature grammars
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
# TODO: subsumes check when adding new edges
|
||
|
|
||
|
class FeatureChart(Chart):
|
||
|
"""
|
||
|
A Chart for feature grammars.
|
||
|
:see: ``Chart`` for more information.
|
||
|
"""
|
||
|
|
||
|
def select(self, **restrictions):
|
||
|
"""
|
||
|
Returns an iterator over the edges in this chart.
|
||
|
See ``Chart.select`` for more information about the
|
||
|
``restrictions`` on the edges.
|
||
|
"""
|
||
|
# If there are no restrictions, then return all edges.
|
||
|
if restrictions=={}: return iter(self._edges)
|
||
|
|
||
|
# Find the index corresponding to the given restrictions.
|
||
|
restr_keys = sorted(restrictions.keys())
|
||
|
restr_keys = tuple(restr_keys)
|
||
|
|
||
|
# If it doesn't exist, then create it.
|
||
|
if restr_keys not in self._indexes:
|
||
|
self._add_index(restr_keys)
|
||
|
|
||
|
vals = tuple(self._get_type_if_possible(restrictions[key])
|
||
|
for key in restr_keys)
|
||
|
return iter(self._indexes[restr_keys].get(vals, []))
|
||
|
|
||
|
def _add_index(self, restr_keys):
|
||
|
"""
|
||
|
A helper function for ``select``, which creates a new index for
|
||
|
a given set of attributes (aka restriction keys).
|
||
|
"""
|
||
|
# Make sure it's a valid index.
|
||
|
for key in restr_keys:
|
||
|
if not hasattr(EdgeI, key):
|
||
|
raise ValueError('Bad restriction: %s' % key)
|
||
|
|
||
|
# Create the index.
|
||
|
index = self._indexes[restr_keys] = {}
|
||
|
|
||
|
# Add all existing edges to the index.
|
||
|
for edge in self._edges:
|
||
|
vals = tuple(self._get_type_if_possible(getattr(edge, key)())
|
||
|
for key in restr_keys)
|
||
|
index.setdefault(vals, []).append(edge)
|
||
|
|
||
|
def _register_with_indexes(self, edge):
|
||
|
"""
|
||
|
A helper function for ``insert``, which registers the new
|
||
|
edge with all existing indexes.
|
||
|
"""
|
||
|
for (restr_keys, index) in self._indexes.items():
|
||
|
vals = tuple(self._get_type_if_possible(getattr(edge, key)())
|
||
|
for key in restr_keys)
|
||
|
index.setdefault(vals, []).append(edge)
|
||
|
|
||
|
def _get_type_if_possible(self, item):
|
||
|
"""
|
||
|
Helper function which returns the ``TYPE`` feature of the ``item``,
|
||
|
if it exists, otherwise it returns the ``item`` itself
|
||
|
"""
|
||
|
if isinstance(item, dict) and TYPE in item:
|
||
|
return item[TYPE]
|
||
|
else:
|
||
|
return item
|
||
|
|
||
|
def parses(self, start, tree_class=Tree):
|
||
|
for edge in self.select(start=0, end=self._num_leaves):
|
||
|
if ((isinstance(edge, FeatureTreeEdge)) and
|
||
|
(edge.lhs()[TYPE] == start[TYPE]) and
|
||
|
(unify(edge.lhs(), start, rename_vars=True))
|
||
|
):
|
||
|
for tree in self.trees(edge, complete=True, tree_class=tree_class):
|
||
|
yield tree
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Fundamental Rule
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
class FeatureFundamentalRule(FundamentalRule):
|
||
|
"""
|
||
|
A specialized version of the fundamental rule that operates on
|
||
|
nonterminals whose symbols are ``FeatStructNonterminal``s. Rather
|
||
|
tha simply comparing the nonterminals for equality, they are
|
||
|
unified. Variable bindings from these unifications are collected
|
||
|
and stored in the chart using a ``FeatureTreeEdge``. When a
|
||
|
complete edge is generated, these bindings are applied to all
|
||
|
nonterminals in the edge.
|
||
|
|
||
|
The fundamental rule states that:
|
||
|
|
||
|
- ``[A -> alpha \* B1 beta][i:j]``
|
||
|
- ``[B2 -> gamma \*][j:k]``
|
||
|
|
||
|
licenses the edge:
|
||
|
|
||
|
- ``[A -> alpha B3 \* beta][i:j]``
|
||
|
|
||
|
assuming that B1 and B2 can be unified to generate B3.
|
||
|
"""
|
||
|
def apply(self, chart, grammar, left_edge, right_edge):
|
||
|
# Make sure the rule is applicable.
|
||
|
if not (left_edge.end() == right_edge.start() and
|
||
|
left_edge.is_incomplete() and
|
||
|
right_edge.is_complete() and
|
||
|
isinstance(left_edge, FeatureTreeEdge)):
|
||
|
return
|
||
|
found = right_edge.lhs()
|
||
|
nextsym = left_edge.nextsym()
|
||
|
if isinstance(right_edge, FeatureTreeEdge):
|
||
|
if not is_nonterminal(nextsym): return
|
||
|
if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: return
|
||
|
# Create a copy of the bindings.
|
||
|
bindings = left_edge.bindings()
|
||
|
# We rename vars here, because we don't want variables
|
||
|
# from the two different productions to match.
|
||
|
found = found.rename_variables(used_vars=left_edge.variables())
|
||
|
# Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
|
||
|
# generate B3 (result).
|
||
|
result = unify(nextsym, found, bindings, rename_vars=False)
|
||
|
if result is None: return
|
||
|
else:
|
||
|
if nextsym != found: return
|
||
|
# Create a copy of the bindings.
|
||
|
bindings = left_edge.bindings()
|
||
|
|
||
|
# Construct the new edge.
|
||
|
new_edge = left_edge.move_dot_forward(right_edge.end(), bindings)
|
||
|
|
||
|
# Add it to the chart, with appropriate child pointers.
|
||
|
if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
|
||
|
yield new_edge
|
||
|
|
||
|
class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
|
||
|
"""
|
||
|
A specialized version of the completer / single edge fundamental rule
|
||
|
that operates on nonterminals whose symbols are ``FeatStructNonterminal``s.
|
||
|
Rather than simply comparing the nonterminals for equality, they are
|
||
|
unified.
|
||
|
"""
|
||
|
_fundamental_rule = FeatureFundamentalRule()
|
||
|
|
||
|
def _apply_complete(self, chart, grammar, right_edge):
|
||
|
fr = self._fundamental_rule
|
||
|
for left_edge in chart.select(end=right_edge.start(),
|
||
|
is_complete=False,
|
||
|
nextsym=right_edge.lhs()):
|
||
|
for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
|
||
|
yield new_edge
|
||
|
|
||
|
def _apply_incomplete(self, chart, grammar, left_edge):
|
||
|
fr = self._fundamental_rule
|
||
|
for right_edge in chart.select(start=left_edge.end(),
|
||
|
is_complete=True,
|
||
|
lhs=left_edge.nextsym()):
|
||
|
for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
|
||
|
yield new_edge
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Top-Down Prediction
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
class FeatureTopDownInitRule(TopDownInitRule):
|
||
|
def apply(self, chart, grammar):
|
||
|
for prod in grammar.productions(lhs=grammar.start()):
|
||
|
new_edge = FeatureTreeEdge.from_production(prod, 0)
|
||
|
if chart.insert(new_edge, ()):
|
||
|
yield new_edge
|
||
|
|
||
|
class FeatureTopDownPredictRule(CachedTopDownPredictRule):
|
||
|
"""
|
||
|
A specialized version of the (cached) top down predict rule that operates
|
||
|
on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather
|
||
|
than simply comparing the nonterminals for equality, they are
|
||
|
unified.
|
||
|
|
||
|
The top down expand rule states that:
|
||
|
|
||
|
- ``[A -> alpha \* B1 beta][i:j]``
|
||
|
|
||
|
licenses the edge:
|
||
|
|
||
|
- ``[B2 -> \* gamma][j:j]``
|
||
|
|
||
|
for each grammar production ``B2 -> gamma``, assuming that B1
|
||
|
and B2 can be unified.
|
||
|
"""
|
||
|
def apply(self, chart, grammar, edge):
|
||
|
if edge.is_complete(): return
|
||
|
nextsym, index = edge.nextsym(), edge.end()
|
||
|
if not is_nonterminal(nextsym): return
|
||
|
|
||
|
# If we've already applied this rule to an edge with the same
|
||
|
# next & end, and the chart & grammar have not changed, then
|
||
|
# just return (no new edges to add).
|
||
|
nextsym_with_bindings = edge.next_with_bindings()
|
||
|
done = self._done.get((nextsym_with_bindings, index), (None, None))
|
||
|
if done[0] is chart and done[1] is grammar:
|
||
|
return
|
||
|
|
||
|
for prod in grammar.productions(lhs=nextsym):
|
||
|
# If the left corner in the predicted production is
|
||
|
# leaf, it must match with the input.
|
||
|
if prod.rhs():
|
||
|
first = prod.rhs()[0]
|
||
|
if is_terminal(first):
|
||
|
if index >= chart.num_leaves(): continue
|
||
|
if first != chart.leaf(index): continue
|
||
|
|
||
|
# We rename vars here, because we don't want variables
|
||
|
# from the two different productions to match.
|
||
|
if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
|
||
|
new_edge = FeatureTreeEdge.from_production(prod, edge.end())
|
||
|
if chart.insert(new_edge, ()):
|
||
|
yield new_edge
|
||
|
|
||
|
# Record the fact that we've applied this rule.
|
||
|
self._done[nextsym_with_bindings, index] = (chart, grammar)
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Bottom-Up Prediction
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
class FeatureBottomUpPredictRule(BottomUpPredictRule):
|
||
|
def apply(self, chart, grammar, edge):
|
||
|
if edge.is_incomplete(): return
|
||
|
for prod in grammar.productions(rhs=edge.lhs()):
|
||
|
if isinstance(edge, FeatureTreeEdge):
|
||
|
_next = prod.rhs()[0]
|
||
|
if not is_nonterminal(_next): continue
|
||
|
|
||
|
new_edge = FeatureTreeEdge.from_production(prod, edge.start())
|
||
|
if chart.insert(new_edge, ()):
|
||
|
yield new_edge
|
||
|
|
||
|
class FeatureBottomUpPredictCombineRule(BottomUpPredictCombineRule):
|
||
|
def apply(self, chart, grammar, edge):
|
||
|
if edge.is_incomplete(): return
|
||
|
found = edge.lhs()
|
||
|
for prod in grammar.productions(rhs=found):
|
||
|
bindings = {}
|
||
|
if isinstance(edge, FeatureTreeEdge):
|
||
|
_next = prod.rhs()[0]
|
||
|
if not is_nonterminal(_next): continue
|
||
|
|
||
|
# We rename vars here, because we don't want variables
|
||
|
# from the two different productions to match.
|
||
|
used_vars = find_variables((prod.lhs(),) + prod.rhs(),
|
||
|
fs_class=FeatStruct)
|
||
|
found = found.rename_variables(used_vars=used_vars)
|
||
|
|
||
|
result = unify(_next, found, bindings, rename_vars=False)
|
||
|
if result is None: continue
|
||
|
|
||
|
new_edge = (FeatureTreeEdge.from_production(prod, edge.start())
|
||
|
.move_dot_forward(edge.end(), bindings))
|
||
|
if chart.insert(new_edge, (edge,)):
|
||
|
yield new_edge
|
||
|
|
||
|
class FeatureEmptyPredictRule(EmptyPredictRule):
|
||
|
def apply(self, chart, grammar):
|
||
|
for prod in grammar.productions(empty=True):
|
||
|
for index in range(chart.num_leaves() + 1):
|
||
|
new_edge = FeatureTreeEdge.from_production(prod, index)
|
||
|
if chart.insert(new_edge, ()):
|
||
|
yield new_edge
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Feature Chart Parser
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
TD_FEATURE_STRATEGY = [LeafInitRule(),
|
||
|
FeatureTopDownInitRule(),
|
||
|
FeatureTopDownPredictRule(),
|
||
|
FeatureSingleEdgeFundamentalRule()]
|
||
|
BU_FEATURE_STRATEGY = [LeafInitRule(),
|
||
|
FeatureEmptyPredictRule(),
|
||
|
FeatureBottomUpPredictRule(),
|
||
|
FeatureSingleEdgeFundamentalRule()]
|
||
|
BU_LC_FEATURE_STRATEGY = [LeafInitRule(),
|
||
|
FeatureEmptyPredictRule(),
|
||
|
FeatureBottomUpPredictCombineRule(),
|
||
|
FeatureSingleEdgeFundamentalRule()]
|
||
|
|
||
|
class FeatureChartParser(ChartParser):
|
||
|
def __init__(self, grammar,
|
||
|
strategy=BU_LC_FEATURE_STRATEGY,
|
||
|
trace_chart_width=20,
|
||
|
chart_class=FeatureChart,
|
||
|
**parser_args):
|
||
|
ChartParser.__init__(self, grammar,
|
||
|
strategy=strategy,
|
||
|
trace_chart_width=trace_chart_width,
|
||
|
chart_class=chart_class,
|
||
|
**parser_args)
|
||
|
|
||
|
class FeatureTopDownChartParser(FeatureChartParser):
|
||
|
def __init__(self, grammar, **parser_args):
|
||
|
FeatureChartParser.__init__(self, grammar, TD_FEATURE_STRATEGY, **parser_args)
|
||
|
|
||
|
class FeatureBottomUpChartParser(FeatureChartParser):
|
||
|
def __init__(self, grammar, **parser_args):
|
||
|
FeatureChartParser.__init__(self, grammar, BU_FEATURE_STRATEGY, **parser_args)
|
||
|
|
||
|
class FeatureBottomUpLeftCornerChartParser(FeatureChartParser):
|
||
|
def __init__(self, grammar, **parser_args):
|
||
|
FeatureChartParser.__init__(self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args)
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Instantiate Variable Chart
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
class InstantiateVarsChart(FeatureChart):
|
||
|
"""
|
||
|
A specialized chart that 'instantiates' variables whose names
|
||
|
start with '@', by replacing them with unique new variables.
|
||
|
In particular, whenever a complete edge is added to the chart, any
|
||
|
variables in the edge's ``lhs`` whose names start with '@' will be
|
||
|
replaced by unique new ``Variable``s.
|
||
|
"""
|
||
|
def __init__(self, tokens):
|
||
|
FeatureChart.__init__(self, tokens)
|
||
|
|
||
|
def initialize(self):
|
||
|
self._instantiated = set()
|
||
|
FeatureChart.initialize(self)
|
||
|
|
||
|
def insert(self, edge, child_pointer_list):
|
||
|
if edge in self._instantiated: return False
|
||
|
self.instantiate_edge(edge)
|
||
|
return FeatureChart.insert(self, edge, child_pointer_list)
|
||
|
|
||
|
def instantiate_edge(self, edge):
|
||
|
"""
|
||
|
If the edge is a ``FeatureTreeEdge``, and it is complete,
|
||
|
then instantiate all variables whose names start with '@',
|
||
|
by replacing them with unique new variables.
|
||
|
|
||
|
Note that instantiation is done in-place, since the
|
||
|
parsing algorithms might already hold a reference to
|
||
|
the edge for future use.
|
||
|
"""
|
||
|
# If the edge is a leaf, or is not complete, or is
|
||
|
# already in the chart, then just return it as-is.
|
||
|
if not isinstance(edge, FeatureTreeEdge): return
|
||
|
if not edge.is_complete(): return
|
||
|
if edge in self._edge_to_cpls: return
|
||
|
|
||
|
# Get a list of variables that need to be instantiated.
|
||
|
# If there are none, then return as-is.
|
||
|
inst_vars = self.inst_vars(edge)
|
||
|
if not inst_vars: return
|
||
|
|
||
|
# Instantiate the edge!
|
||
|
self._instantiated.add(edge)
|
||
|
edge._lhs = edge.lhs().substitute_bindings(inst_vars)
|
||
|
|
||
|
def inst_vars(self, edge):
|
||
|
return dict((var, logic.unique_variable())
|
||
|
for var in edge.lhs().variables()
|
||
|
if var.name.startswith('@'))
|
||
|
|
||
|
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
# Demo
|
||
|
#////////////////////////////////////////////////////////////
|
||
|
|
||
|
def demo_grammar():
|
||
|
from nltk.grammar import FeatureGrammar
|
||
|
return FeatureGrammar.fromstring("""
|
||
|
S -> NP VP
|
||
|
PP -> Prep NP
|
||
|
NP -> NP PP
|
||
|
VP -> VP PP
|
||
|
VP -> Verb NP
|
||
|
VP -> Verb
|
||
|
NP -> Det[pl=?x] Noun[pl=?x]
|
||
|
NP -> "John"
|
||
|
NP -> "I"
|
||
|
Det -> "the"
|
||
|
Det -> "my"
|
||
|
Det[-pl] -> "a"
|
||
|
Noun[-pl] -> "dog"
|
||
|
Noun[-pl] -> "cookie"
|
||
|
Verb -> "ate"
|
||
|
Verb -> "saw"
|
||
|
Prep -> "with"
|
||
|
Prep -> "under"
|
||
|
""")
|
||
|
|
||
|
def demo(print_times=True, print_grammar=True,
|
||
|
print_trees=True, print_sentence=True,
|
||
|
trace=1,
|
||
|
parser=FeatureChartParser,
|
||
|
sent='I saw John with a dog with my cookie'):
|
||
|
import sys, time
|
||
|
print()
|
||
|
grammar = demo_grammar()
|
||
|
if print_grammar:
|
||
|
print(grammar)
|
||
|
print()
|
||
|
print("*", parser.__name__)
|
||
|
if print_sentence:
|
||
|
print("Sentence:", sent)
|
||
|
tokens = sent.split()
|
||
|
t = time.clock()
|
||
|
cp = parser(grammar, trace=trace)
|
||
|
chart = cp.chart_parse(tokens)
|
||
|
trees = list(chart.parses(grammar.start()))
|
||
|
if print_times:
|
||
|
print("Time: %s" % (time.clock() - t))
|
||
|
if print_trees:
|
||
|
for tree in trees: print(tree)
|
||
|
else:
|
||
|
print("Nr trees:", len(trees))
|
||
|
|
||
|
def run_profile():
|
||
|
import profile
|
||
|
profile.run('for i in range(1): demo()', '/tmp/profile.out')
|
||
|
import pstats
|
||
|
p = pstats.Stats('/tmp/profile.out')
|
||
|
p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
|
||
|
p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
from nltk.data import load
|
||
|
demo()
|
||
|
print()
|
||
|
grammar = load('grammars/book_grammars/feat0.fcfg')
|
||
|
cp = FeatureChartParser(grammar, trace=2)
|
||
|
sent = 'Kim likes children'
|
||
|
tokens = sent.split()
|
||
|
trees = cp.parse(tokens)
|
||
|
for tree in trees:
|
||
|
print(tree)
|