88 lines
2.4 KiB
Python
88 lines
2.4 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# Natural Language Toolkit: Generating from a CFG
|
||
|
#
|
||
|
# Copyright (C) 2001-2018 NLTK Project
|
||
|
# Author: Steven Bird <stevenbird1@gmail.com>
|
||
|
# Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
|
||
|
# URL: <http://nltk.org/>
|
||
|
# For license information, see LICENSE.TXT
|
||
|
#
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import itertools
|
||
|
import sys
|
||
|
from nltk.grammar import Nonterminal
|
||
|
|
||
|
|
||
|
def generate(grammar, start=None, depth=None, n=None):
|
||
|
"""
|
||
|
Generates an iterator of all sentences from a CFG.
|
||
|
|
||
|
:param grammar: The Grammar used to generate sentences.
|
||
|
:param start: The Nonterminal from which to start generate sentences.
|
||
|
:param depth: The maximal depth of the generated tree.
|
||
|
:param n: The maximum number of sentences to return.
|
||
|
:return: An iterator of lists of terminal tokens.
|
||
|
"""
|
||
|
if not start:
|
||
|
start = grammar.start()
|
||
|
if depth is None:
|
||
|
depth = sys.maxsize
|
||
|
|
||
|
iter = _generate_all(grammar, [start], depth)
|
||
|
|
||
|
if n:
|
||
|
iter = itertools.islice(iter, n)
|
||
|
|
||
|
return iter
|
||
|
|
||
|
|
||
|
def _generate_all(grammar, items, depth):
|
||
|
if items:
|
||
|
try:
|
||
|
for frag1 in _generate_one(grammar, items[0], depth):
|
||
|
for frag2 in _generate_all(grammar, items[1:], depth):
|
||
|
yield frag1 + frag2
|
||
|
except RuntimeError as _error:
|
||
|
if _error.message == "maximum recursion depth exceeded":
|
||
|
# Helpful error message while still showing the recursion stack.
|
||
|
raise RuntimeError("The grammar has rule(s) that yield infinite recursion!!")
|
||
|
else:
|
||
|
raise
|
||
|
else:
|
||
|
yield []
|
||
|
|
||
|
|
||
|
def _generate_one(grammar, item, depth):
|
||
|
if depth > 0:
|
||
|
if isinstance(item, Nonterminal):
|
||
|
for prod in grammar.productions(lhs=item):
|
||
|
for frag in _generate_all(grammar, prod.rhs(), depth-1):
|
||
|
yield frag
|
||
|
else:
|
||
|
yield [item]
|
||
|
|
||
|
demo_grammar = """
|
||
|
S -> NP VP
|
||
|
NP -> Det N
|
||
|
PP -> P NP
|
||
|
VP -> 'slept' | 'saw' NP | 'walked' PP
|
||
|
Det -> 'the' | 'a'
|
||
|
N -> 'man' | 'park' | 'dog'
|
||
|
P -> 'in' | 'with'
|
||
|
"""
|
||
|
|
||
|
|
||
|
def demo(N=23):
|
||
|
from nltk.grammar import CFG
|
||
|
|
||
|
print('Generating the first %d sentences for demo grammar:' % (N,))
|
||
|
print(demo_grammar)
|
||
|
grammar = CFG.fromstring(demo_grammar)
|
||
|
for n, sent in enumerate(generate(grammar, n=N), 1):
|
||
|
print('%3d. %s' % (n, ' '.join(sent)))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
demo()
|