884 lines
31 KiB
Text
884 lines
31 KiB
Text
.. Copyright (C) 2001-2018 NLTK Project
|
|
.. For license information, see LICENSE.TXT
|
|
|
|
=========
|
|
Parsing
|
|
=========
|
|
|
|
Unit tests for the Context Free Grammar class
|
|
---------------------------------------------
|
|
|
|
>>> from nltk import Nonterminal, nonterminals, Production, CFG
|
|
|
|
>>> nt1 = Nonterminal('NP')
|
|
>>> nt2 = Nonterminal('VP')
|
|
|
|
>>> nt1.symbol()
|
|
'NP'
|
|
|
|
>>> nt1 == Nonterminal('NP')
|
|
True
|
|
|
|
>>> nt1 == nt2
|
|
False
|
|
|
|
>>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
|
|
>>> N, V, P, DT = nonterminals('N, V, P, DT')
|
|
|
|
>>> prod1 = Production(S, [NP, VP])
|
|
>>> prod2 = Production(NP, [DT, NP])
|
|
|
|
>>> prod1.lhs()
|
|
S
|
|
|
|
>>> prod1.rhs()
|
|
(NP, VP)
|
|
|
|
>>> prod1 == Production(S, [NP, VP])
|
|
True
|
|
|
|
>>> prod1 == prod2
|
|
False
|
|
|
|
>>> grammar = CFG.fromstring("""
|
|
... S -> NP VP
|
|
... PP -> P NP
|
|
... NP -> 'the' N | N PP | 'the' N PP
|
|
... VP -> V NP | V PP | V NP PP
|
|
... N -> 'cat'
|
|
... N -> 'dog'
|
|
... N -> 'rug'
|
|
... V -> 'chased'
|
|
... V -> 'sat'
|
|
... P -> 'in'
|
|
... P -> 'on'
|
|
... """)
|
|
|
|
Unit tests for the rd (Recursive Descent Parser) class
|
|
------------------------------------------------------
|
|
|
|
Create and run a recursive descent parser over both a syntactically ambiguous
|
|
and unambiguous sentence.
|
|
|
|
>>> from nltk.parse import RecursiveDescentParser
|
|
>>> rd = RecursiveDescentParser(grammar)
|
|
|
|
>>> sentence1 = 'the cat chased the dog'.split()
|
|
>>> sentence2 = 'the cat chased the dog on the rug'.split()
|
|
|
|
>>> for t in rd.parse(sentence1):
|
|
... print(t)
|
|
(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
|
|
|
|
>>> for t in rd.parse(sentence2):
|
|
... print(t)
|
|
(S
|
|
(NP the (N cat))
|
|
(VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
|
|
(S
|
|
(NP the (N cat))
|
|
(VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
|
|
|
|
|
|
(dolist (expr doctest-font-lock-keywords)
|
|
(add-to-list 'font-lock-keywords expr))
|
|
|
|
font-lock-keywords
|
|
(add-to-list 'font-lock-keywords
|
|
(car doctest-font-lock-keywords))
|
|
|
|
|
|
Unit tests for the sr (Shift Reduce Parser) class
|
|
-------------------------------------------------
|
|
|
|
Create and run a shift reduce parser over both a syntactically ambiguous
|
|
and unambiguous sentence. Note that unlike the recursive descent parser, one
|
|
and only one parse is ever returned.
|
|
|
|
>>> from nltk.parse import ShiftReduceParser
|
|
>>> sr = ShiftReduceParser(grammar)
|
|
|
|
>>> sentence1 = 'the cat chased the dog'.split()
|
|
>>> sentence2 = 'the cat chased the dog on the rug'.split()
|
|
|
|
>>> for t in sr.parse(sentence1):
|
|
... print(t)
|
|
(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
|
|
|
|
|
|
The shift reduce parser uses heuristics to decide what to do when there are
|
|
multiple possible shift or reduce operations available - for the supplied
|
|
grammar clearly the wrong operation is selected.
|
|
|
|
>>> for t in sr.parse(sentence2):
|
|
... print(t)
|
|
|
|
|
|
Unit tests for the Chart Parser class
|
|
-------------------------------------
|
|
|
|
We use the demo() function for testing.
|
|
We must turn off showing of times.
|
|
|
|
>>> import nltk
|
|
|
|
First we test tracing with a short sentence
|
|
|
|
>>> nltk.parse.chart.demo(2, print_times=False, trace=1,
|
|
... sent='I saw a dog', numparses=1)
|
|
* Sentence:
|
|
I saw a dog
|
|
['I', 'saw', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Bottom-up
|
|
<BLANKLINE>
|
|
|. I . saw . a . dog .|
|
|
|[---------] . . .| [0:1] 'I'
|
|
|. [---------] . .| [1:2] 'saw'
|
|
|. . [---------] .| [2:3] 'a'
|
|
|. . . [---------]| [3:4] 'dog'
|
|
|> . . . .| [0:0] NP -> * 'I'
|
|
|[---------] . . .| [0:1] NP -> 'I' *
|
|
|> . . . .| [0:0] S -> * NP VP
|
|
|> . . . .| [0:0] NP -> * NP PP
|
|
|[---------> . . .| [0:1] S -> NP * VP
|
|
|[---------> . . .| [0:1] NP -> NP * PP
|
|
|. > . . .| [1:1] Verb -> * 'saw'
|
|
|. [---------] . .| [1:2] Verb -> 'saw' *
|
|
|. > . . .| [1:1] VP -> * Verb NP
|
|
|. > . . .| [1:1] VP -> * Verb
|
|
|. [---------> . .| [1:2] VP -> Verb * NP
|
|
|. [---------] . .| [1:2] VP -> Verb *
|
|
|. > . . .| [1:1] VP -> * VP PP
|
|
|[-------------------] . .| [0:2] S -> NP VP *
|
|
|. [---------> . .| [1:2] VP -> VP * PP
|
|
|. . > . .| [2:2] Det -> * 'a'
|
|
|. . [---------] .| [2:3] Det -> 'a' *
|
|
|. . > . .| [2:2] NP -> * Det Noun
|
|
|. . [---------> .| [2:3] NP -> Det * Noun
|
|
|. . . > .| [3:3] Noun -> * 'dog'
|
|
|. . . [---------]| [3:4] Noun -> 'dog' *
|
|
|. . [-------------------]| [2:4] NP -> Det Noun *
|
|
|. . > . .| [2:2] S -> * NP VP
|
|
|. . > . .| [2:2] NP -> * NP PP
|
|
|. [-----------------------------]| [1:4] VP -> Verb NP *
|
|
|. . [------------------->| [2:4] S -> NP * VP
|
|
|. . [------------------->| [2:4] NP -> NP * PP
|
|
|[=======================================]| [0:4] S -> NP VP *
|
|
|. [----------------------------->| [1:4] VP -> VP * PP
|
|
Nr edges in chart: 33
|
|
(S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
|
|
<BLANKLINE>
|
|
|
|
Then we test the different parsing Strategies.
|
|
Note that the number of edges differ between the strategies.
|
|
|
|
Top-down
|
|
|
|
>>> nltk.parse.chart.demo(1, print_times=False, trace=0,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Top-down
|
|
<BLANKLINE>
|
|
Nr edges in chart: 48
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
<BLANKLINE>
|
|
|
|
Bottom-up
|
|
|
|
>>> nltk.parse.chart.demo(2, print_times=False, trace=0,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Bottom-up
|
|
<BLANKLINE>
|
|
Nr edges in chart: 53
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
<BLANKLINE>
|
|
|
|
Bottom-up Left-Corner
|
|
|
|
>>> nltk.parse.chart.demo(3, print_times=False, trace=0,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Bottom-up left-corner
|
|
<BLANKLINE>
|
|
Nr edges in chart: 36
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
<BLANKLINE>
|
|
|
|
Left-Corner with Bottom-Up Filter
|
|
|
|
>>> nltk.parse.chart.demo(4, print_times=False, trace=0,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Filtered left-corner
|
|
<BLANKLINE>
|
|
Nr edges in chart: 28
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
<BLANKLINE>
|
|
|
|
The stepping chart parser
|
|
|
|
>>> nltk.parse.chart.demo(5, print_times=False, trace=1,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
* Strategy: Stepping (top-down vs bottom-up)
|
|
<BLANKLINE>
|
|
*** SWITCH TO TOP DOWN
|
|
|[------] . . . . .| [0:1] 'I'
|
|
|. [------] . . . .| [1:2] 'saw'
|
|
|. . [------] . . .| [2:3] 'John'
|
|
|. . . [------] . .| [3:4] 'with'
|
|
|. . . . [------] .| [4:5] 'a'
|
|
|. . . . . [------]| [5:6] 'dog'
|
|
|> . . . . . .| [0:0] S -> * NP VP
|
|
|> . . . . . .| [0:0] NP -> * NP PP
|
|
|> . . . . . .| [0:0] NP -> * Det Noun
|
|
|> . . . . . .| [0:0] NP -> * 'I'
|
|
|[------] . . . . .| [0:1] NP -> 'I' *
|
|
|[------> . . . . .| [0:1] S -> NP * VP
|
|
|[------> . . . . .| [0:1] NP -> NP * PP
|
|
|. > . . . . .| [1:1] VP -> * VP PP
|
|
|. > . . . . .| [1:1] VP -> * Verb NP
|
|
|. > . . . . .| [1:1] VP -> * Verb
|
|
|. > . . . . .| [1:1] Verb -> * 'saw'
|
|
|. [------] . . . .| [1:2] Verb -> 'saw' *
|
|
|. [------> . . . .| [1:2] VP -> Verb * NP
|
|
|. [------] . . . .| [1:2] VP -> Verb *
|
|
|[-------------] . . . .| [0:2] S -> NP VP *
|
|
|. [------> . . . .| [1:2] VP -> VP * PP
|
|
*** SWITCH TO BOTTOM UP
|
|
|. . > . . . .| [2:2] NP -> * 'John'
|
|
|. . . > . . .| [3:3] PP -> * 'with' NP
|
|
|. . . > . . .| [3:3] Prep -> * 'with'
|
|
|. . . . > . .| [4:4] Det -> * 'a'
|
|
|. . . . . > .| [5:5] Noun -> * 'dog'
|
|
|. . [------] . . .| [2:3] NP -> 'John' *
|
|
|. . . [------> . .| [3:4] PP -> 'with' * NP
|
|
|. . . [------] . .| [3:4] Prep -> 'with' *
|
|
|. . . . [------] .| [4:5] Det -> 'a' *
|
|
|. . . . . [------]| [5:6] Noun -> 'dog' *
|
|
|. [-------------] . . .| [1:3] VP -> Verb NP *
|
|
|[--------------------] . . .| [0:3] S -> NP VP *
|
|
|. [-------------> . . .| [1:3] VP -> VP * PP
|
|
|. . > . . . .| [2:2] S -> * NP VP
|
|
|. . > . . . .| [2:2] NP -> * NP PP
|
|
|. . . . > . .| [4:4] NP -> * Det Noun
|
|
|. . [------> . . .| [2:3] S -> NP * VP
|
|
|. . [------> . . .| [2:3] NP -> NP * PP
|
|
|. . . . [------> .| [4:5] NP -> Det * Noun
|
|
|. . . . [-------------]| [4:6] NP -> Det Noun *
|
|
|. . . [--------------------]| [3:6] PP -> 'with' NP *
|
|
|. [----------------------------------]| [1:6] VP -> VP PP *
|
|
*** SWITCH TO TOP DOWN
|
|
|. . > . . . .| [2:2] NP -> * Det Noun
|
|
|. . . . > . .| [4:4] NP -> * NP PP
|
|
|. . . > . . .| [3:3] VP -> * VP PP
|
|
|. . . > . . .| [3:3] VP -> * Verb NP
|
|
|. . . > . . .| [3:3] VP -> * Verb
|
|
|[=========================================]| [0:6] S -> NP VP *
|
|
|. [---------------------------------->| [1:6] VP -> VP * PP
|
|
|. . [---------------------------]| [2:6] NP -> NP PP *
|
|
|. . . . [------------->| [4:6] NP -> NP * PP
|
|
|. [----------------------------------]| [1:6] VP -> Verb NP *
|
|
|. . [--------------------------->| [2:6] S -> NP * VP
|
|
|. . [--------------------------->| [2:6] NP -> NP * PP
|
|
|[=========================================]| [0:6] S -> NP VP *
|
|
|. [---------------------------------->| [1:6] VP -> VP * PP
|
|
|. . . . . . >| [6:6] VP -> * VP PP
|
|
|. . . . . . >| [6:6] VP -> * Verb NP
|
|
|. . . . . . >| [6:6] VP -> * Verb
|
|
*** SWITCH TO BOTTOM UP
|
|
|. . . . > . .| [4:4] S -> * NP VP
|
|
|. . . . [------------->| [4:6] S -> NP * VP
|
|
*** SWITCH TO TOP DOWN
|
|
*** SWITCH TO BOTTOM UP
|
|
*** SWITCH TO TOP DOWN
|
|
*** SWITCH TO BOTTOM UP
|
|
*** SWITCH TO TOP DOWN
|
|
*** SWITCH TO BOTTOM UP
|
|
Nr edges in chart: 61
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
<BLANKLINE>
|
|
|
|
|
|
Unit tests for the Incremental Chart Parser class
|
|
-------------------------------------------------
|
|
|
|
The incremental chart parsers are defined in earleychart.py.
|
|
We use the demo() function for testing. We must turn off showing of times.
|
|
|
|
>>> import nltk
|
|
|
|
Earley Chart Parser
|
|
|
|
>>> nltk.parse.earleychart.demo(print_times=False, trace=1,
|
|
... sent='I saw John with a dog', numparses=2)
|
|
* Sentence:
|
|
I saw John with a dog
|
|
['I', 'saw', 'John', 'with', 'a', 'dog']
|
|
<BLANKLINE>
|
|
|. I . saw . John . with . a . dog .|
|
|
|[------] . . . . .| [0:1] 'I'
|
|
|. [------] . . . .| [1:2] 'saw'
|
|
|. . [------] . . .| [2:3] 'John'
|
|
|. . . [------] . .| [3:4] 'with'
|
|
|. . . . [------] .| [4:5] 'a'
|
|
|. . . . . [------]| [5:6] 'dog'
|
|
|> . . . . . .| [0:0] S -> * NP VP
|
|
|> . . . . . .| [0:0] NP -> * NP PP
|
|
|> . . . . . .| [0:0] NP -> * Det Noun
|
|
|> . . . . . .| [0:0] NP -> * 'I'
|
|
|[------] . . . . .| [0:1] NP -> 'I' *
|
|
|[------> . . . . .| [0:1] S -> NP * VP
|
|
|[------> . . . . .| [0:1] NP -> NP * PP
|
|
|. > . . . . .| [1:1] VP -> * VP PP
|
|
|. > . . . . .| [1:1] VP -> * Verb NP
|
|
|. > . . . . .| [1:1] VP -> * Verb
|
|
|. > . . . . .| [1:1] Verb -> * 'saw'
|
|
|. [------] . . . .| [1:2] Verb -> 'saw' *
|
|
|. [------> . . . .| [1:2] VP -> Verb * NP
|
|
|. [------] . . . .| [1:2] VP -> Verb *
|
|
|[-------------] . . . .| [0:2] S -> NP VP *
|
|
|. [------> . . . .| [1:2] VP -> VP * PP
|
|
|. . > . . . .| [2:2] NP -> * NP PP
|
|
|. . > . . . .| [2:2] NP -> * Det Noun
|
|
|. . > . . . .| [2:2] NP -> * 'John'
|
|
|. . [------] . . .| [2:3] NP -> 'John' *
|
|
|. [-------------] . . .| [1:3] VP -> Verb NP *
|
|
|. . [------> . . .| [2:3] NP -> NP * PP
|
|
|. . . > . . .| [3:3] PP -> * 'with' NP
|
|
|[--------------------] . . .| [0:3] S -> NP VP *
|
|
|. [-------------> . . .| [1:3] VP -> VP * PP
|
|
|. . . [------> . .| [3:4] PP -> 'with' * NP
|
|
|. . . . > . .| [4:4] NP -> * NP PP
|
|
|. . . . > . .| [4:4] NP -> * Det Noun
|
|
|. . . . > . .| [4:4] Det -> * 'a'
|
|
|. . . . [------] .| [4:5] Det -> 'a' *
|
|
|. . . . [------> .| [4:5] NP -> Det * Noun
|
|
|. . . . . > .| [5:5] Noun -> * 'dog'
|
|
|. . . . . [------]| [5:6] Noun -> 'dog' *
|
|
|. . . . [-------------]| [4:6] NP -> Det Noun *
|
|
|. . . [--------------------]| [3:6] PP -> 'with' NP *
|
|
|. . . . [------------->| [4:6] NP -> NP * PP
|
|
|. . [---------------------------]| [2:6] NP -> NP PP *
|
|
|. [----------------------------------]| [1:6] VP -> VP PP *
|
|
|[=========================================]| [0:6] S -> NP VP *
|
|
|. [---------------------------------->| [1:6] VP -> VP * PP
|
|
|. [----------------------------------]| [1:6] VP -> Verb NP *
|
|
|. . [--------------------------->| [2:6] NP -> NP * PP
|
|
|[=========================================]| [0:6] S -> NP VP *
|
|
|. [---------------------------------->| [1:6] VP -> VP * PP
|
|
(S
|
|
(NP I)
|
|
(VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
|
|
(S
|
|
(NP I)
|
|
(VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
|
|
|
|
|
|
Unit tests for LARGE context-free grammars
|
|
------------------------------------------
|
|
|
|
Reading the ATIS grammar.
|
|
|
|
>>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
|
|
>>> grammar
|
|
<Grammar with 5517 productions>
|
|
|
|
Reading the test sentences.
|
|
|
|
>>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
|
|
>>> sentences = nltk.parse.util.extract_test_sentences(sentences)
|
|
>>> len(sentences)
|
|
98
|
|
>>> testsentence = sentences[22]
|
|
>>> testsentence[0]
|
|
['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
|
|
>>> testsentence[1]
|
|
17
|
|
>>> sentence = testsentence[0]
|
|
|
|
Now we test all different parsing strategies.
|
|
Note that the number of edges differ between the strategies.
|
|
|
|
Bottom-up parsing.
|
|
|
|
>>> parser = nltk.parse.BottomUpChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
7661
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Bottom-up Left-corner parsing.
|
|
|
|
>>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
4986
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Left-corner parsing with bottom-up filter.
|
|
|
|
>>> parser = nltk.parse.LeftCornerChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
1342
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Top-down parsing.
|
|
|
|
>>> parser = nltk.parse.TopDownChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
28352
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Incremental Bottom-up parsing.
|
|
|
|
>>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
7661
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Incremental Bottom-up Left-corner parsing.
|
|
|
|
>>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
4986
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Incremental Left-corner parsing with bottom-up filter.
|
|
|
|
>>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
1342
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Incremental Top-down parsing.
|
|
|
|
>>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
28352
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
Earley parsing. This is similar to the incremental top-down algorithm.
|
|
|
|
>>> parser = nltk.parse.EarleyChartParser(grammar)
|
|
>>> chart = parser.chart_parse(sentence)
|
|
>>> print((chart.num_edges()))
|
|
28352
|
|
>>> print((len(list(chart.parses(grammar.start())))))
|
|
17
|
|
|
|
|
|
Unit tests for the Probabilistic CFG class
|
|
------------------------------------------
|
|
|
|
>>> from nltk.corpus import treebank
|
|
>>> from itertools import islice
|
|
>>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
|
|
|
|
Create a set of PCFG productions.
|
|
|
|
>>> grammar = PCFG.fromstring("""
|
|
... A -> B B [.3] | C B C [.7]
|
|
... B -> B D [.5] | C [.5]
|
|
... C -> 'a' [.1] | 'b' [0.9]
|
|
... D -> 'b' [1.0]
|
|
... """)
|
|
>>> prod = grammar.productions()[0]
|
|
>>> prod
|
|
A -> B B [0.3]
|
|
|
|
>>> prod.lhs()
|
|
A
|
|
|
|
>>> prod.rhs()
|
|
(B, B)
|
|
|
|
>>> print((prod.prob()))
|
|
0.3
|
|
|
|
>>> grammar.start()
|
|
A
|
|
|
|
>>> grammar.productions()
|
|
[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
|
|
|
|
Induce some productions using parsed Treebank data.
|
|
|
|
>>> productions = []
|
|
>>> for fileid in treebank.fileids()[:2]:
|
|
... for t in treebank.parsed_sents(fileid):
|
|
... productions += t.productions()
|
|
|
|
>>> grammar = induce_pcfg(S, productions)
|
|
>>> grammar
|
|
<Grammar with 71 productions>
|
|
|
|
>>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
|
|
[PP -> IN NP [1.0]]
|
|
>>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
|
|
[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
|
|
>>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
|
|
[JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
|
|
>>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
|
|
[NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
|
|
|
|
Unit tests for the Probabilistic Chart Parse classes
|
|
----------------------------------------------------
|
|
|
|
>>> tokens = "Jack saw Bob with my cookie".split()
|
|
>>> grammar = toy_pcfg2
|
|
>>> print(grammar)
|
|
Grammar with 23 productions (start state = S)
|
|
S -> NP VP [1.0]
|
|
VP -> V NP [0.59]
|
|
VP -> V [0.4]
|
|
VP -> VP PP [0.01]
|
|
NP -> Det N [0.41]
|
|
NP -> Name [0.28]
|
|
NP -> NP PP [0.31]
|
|
PP -> P NP [1.0]
|
|
V -> 'saw' [0.21]
|
|
V -> 'ate' [0.51]
|
|
V -> 'ran' [0.28]
|
|
N -> 'boy' [0.11]
|
|
N -> 'cookie' [0.12]
|
|
N -> 'table' [0.13]
|
|
N -> 'telescope' [0.14]
|
|
N -> 'hill' [0.5]
|
|
Name -> 'Jack' [0.52]
|
|
Name -> 'Bob' [0.48]
|
|
P -> 'with' [0.61]
|
|
P -> 'under' [0.39]
|
|
Det -> 'the' [0.41]
|
|
Det -> 'a' [0.31]
|
|
Det -> 'my' [0.28]
|
|
|
|
Create several parsers using different queuing strategies and show the
|
|
resulting parses.
|
|
|
|
>>> from nltk.parse import pchart
|
|
|
|
>>> parser = pchart.InsideChartParser(grammar)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(V saw)
|
|
(NP
|
|
(NP (Name Bob))
|
|
(PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(VP (V saw) (NP (Name Bob)))
|
|
(PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
|
|
|
|
>>> parser = pchart.RandomChartParser(grammar)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(V saw)
|
|
(NP
|
|
(NP (Name Bob))
|
|
(PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(VP (V saw) (NP (Name Bob)))
|
|
(PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
|
|
|
|
>>> parser = pchart.UnsortedChartParser(grammar)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(V saw)
|
|
(NP
|
|
(NP (Name Bob))
|
|
(PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(VP (V saw) (NP (Name Bob)))
|
|
(PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
|
|
|
|
>>> parser = pchart.LongestChartParser(grammar)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(V saw)
|
|
(NP
|
|
(NP (Name Bob))
|
|
(PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(VP (V saw) (NP (Name Bob)))
|
|
(PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
|
|
|
|
>>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
|
|
|
|
Unit tests for the Viterbi Parse classes
|
|
----------------------------------------
|
|
|
|
>>> from nltk.parse import ViterbiParser
|
|
>>> tokens = "Jack saw Bob with my cookie".split()
|
|
>>> grammar = toy_pcfg2
|
|
|
|
Parse the tokenized sentence.
|
|
|
|
>>> parser = ViterbiParser(grammar)
|
|
>>> for t in parser.parse(tokens):
|
|
... print(t)
|
|
(S
|
|
(NP (Name Jack))
|
|
(VP
|
|
(V saw)
|
|
(NP
|
|
(NP (Name Bob))
|
|
(PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
|
|
|
|
|
|
Unit tests for the FeatStructNonterminal class
|
|
----------------------------------------------
|
|
|
|
>>> from nltk.grammar import FeatStructNonterminal
|
|
>>> FeatStructNonterminal(
|
|
... pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
|
|
[agr=[gender='f', number='pl'], pos='n']
|
|
|
|
>>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
|
|
VP[+fin]/NP[+pl]
|
|
|
|
|
|
Tracing the Feature Chart Parser
|
|
--------------------------------
|
|
|
|
We use the featurechart.demo() function for tracing the Feature Chart Parser.
|
|
|
|
>>> nltk.parse.featurechart.demo(print_times=False,
|
|
... print_grammar=True,
|
|
... parser=nltk.parse.featurechart.FeatureChartParser,
|
|
... sent='I saw John with a dog')
|
|
<BLANKLINE>
|
|
Grammar with 18 productions (start state = S[])
|
|
S[] -> NP[] VP[]
|
|
PP[] -> Prep[] NP[]
|
|
NP[] -> NP[] PP[]
|
|
VP[] -> VP[] PP[]
|
|
VP[] -> Verb[] NP[]
|
|
VP[] -> Verb[]
|
|
NP[] -> Det[pl=?x] Noun[pl=?x]
|
|
NP[] -> 'John'
|
|
NP[] -> 'I'
|
|
Det[] -> 'the'
|
|
Det[] -> 'my'
|
|
Det[-pl] -> 'a'
|
|
Noun[-pl] -> 'dog'
|
|
Noun[-pl] -> 'cookie'
|
|
Verb[] -> 'ate'
|
|
Verb[] -> 'saw'
|
|
Prep[] -> 'with'
|
|
Prep[] -> 'under'
|
|
<BLANKLINE>
|
|
* FeatureChartParser
|
|
Sentence: I saw John with a dog
|
|
|.I.s.J.w.a.d.|
|
|
|[-] . . . . .| [0:1] 'I'
|
|
|. [-] . . . .| [1:2] 'saw'
|
|
|. . [-] . . .| [2:3] 'John'
|
|
|. . . [-] . .| [3:4] 'with'
|
|
|. . . . [-] .| [4:5] 'a'
|
|
|. . . . . [-]| [5:6] 'dog'
|
|
|[-] . . . . .| [0:1] NP[] -> 'I' *
|
|
|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
|
|
|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
|
|
|. [-] . . . .| [1:2] Verb[] -> 'saw' *
|
|
|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
|
|
|. [-] . . . .| [1:2] VP[] -> Verb[] *
|
|
|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
|
|
|[---] . . . .| [0:2] S[] -> NP[] VP[] *
|
|
|. . [-] . . .| [2:3] NP[] -> 'John' *
|
|
|. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
|
|
|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
|
|
|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
|
|
|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
|
|
|[-----] . . .| [0:3] S[] -> NP[] VP[] *
|
|
|. . . [-] . .| [3:4] Prep[] -> 'with' *
|
|
|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
|
|
|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
|
|
|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
|
|
|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
|
|
|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
|
|
|. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
|
|
|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
|
|
|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
|
|
|. . [-------]| [2:6] NP[] -> NP[] PP[] *
|
|
|. [---------]| [1:6] VP[] -> VP[] PP[] *
|
|
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|
|
|[===========]| [0:6] S[] -> NP[] VP[] *
|
|
|. . [------->| [2:6] S[] -> NP[] * VP[] {}
|
|
|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
|
|
|. [---------]| [1:6] VP[] -> Verb[] NP[] *
|
|
|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
|
|
|[===========]| [0:6] S[] -> NP[] VP[] *
|
|
(S[]
|
|
(NP[] I)
|
|
(VP[]
|
|
(VP[] (Verb[] saw) (NP[] John))
|
|
(PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
|
|
(S[]
|
|
(NP[] I)
|
|
(VP[]
|
|
(Verb[] saw)
|
|
(NP[]
|
|
(NP[] John)
|
|
(PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
|
|
|
|
|
|
Unit tests for the Feature Chart Parser classes
|
|
-----------------------------------------------
|
|
|
|
The list of parsers we want to test.
|
|
|
|
>>> parsers = [nltk.parse.featurechart.FeatureChartParser,
|
|
... nltk.parse.featurechart.FeatureTopDownChartParser,
|
|
... nltk.parse.featurechart.FeatureBottomUpChartParser,
|
|
... nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
|
|
... nltk.parse.earleychart.FeatureIncrementalChartParser,
|
|
... nltk.parse.earleychart.FeatureEarleyChartParser,
|
|
... nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
|
|
... nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
|
|
... nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
|
|
... ]
|
|
|
|
A helper function that tests each parser on the given grammar and sentence.
|
|
We check that the number of trees are correct, and that all parsers
|
|
return the same trees. Otherwise an error is printed.
|
|
|
|
>>> def unittest(grammar, sentence, nr_trees):
|
|
... sentence = sentence.split()
|
|
... trees = None
|
|
... for P in parsers:
|
|
... result = P(grammar).parse(sentence)
|
|
... result = set(tree.freeze() for tree in result)
|
|
... if len(result) != nr_trees:
|
|
... print("Wrong nr of trees:", len(result))
|
|
... elif trees is None:
|
|
... trees = result
|
|
... elif result != trees:
|
|
... print("Trees differ for parser:", P.__name__)
|
|
|
|
The demo grammar from before, with an ambiguous sentence.
|
|
|
|
>>> isawjohn = nltk.parse.featurechart.demo_grammar()
|
|
>>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
|
|
|
|
This grammar tests that variables in different grammar rules are renamed
|
|
before unification. (The problematic variable is in this case ?X).
|
|
|
|
>>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
|
|
... S[] -> NP[num=?N] VP[num=?N, slash=?X]
|
|
... NP[num=?X] -> "what"
|
|
... NP[num=?X] -> "that"
|
|
... VP[num=?P, slash=none] -> V[num=?P] NP[]
|
|
... V[num=sg] -> "was"
|
|
... ''')
|
|
>>> unittest(whatwasthat, "what was that", 1)
|
|
|
|
This grammar tests that the same rule can be used in different places
|
|
in another rule, and that the variables are properly renamed.
|
|
|
|
>>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
|
|
... S[] -> NP[case=nom] V[] NP[case=acc]
|
|
... NP[case=?X] -> Pron[case=?X]
|
|
... Pron[] -> "this"
|
|
... Pron[] -> "that"
|
|
... V[] -> "loves"
|
|
... ''')
|
|
>>> unittest(thislovesthat, "this loves that", 1)
|
|
|
|
|
|
Tests for loading feature grammar files
|
|
---------------------------------------
|
|
|
|
Alternative 1: first load the grammar, then create the parser.
|
|
|
|
>>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
|
|
>>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
|
|
>>> print((type(fcp1)))
|
|
<class 'nltk.parse.featurechart.FeatureChartParser'>
|
|
|
|
Alternative 2: directly load the parser.
|
|
|
|
>>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
|
|
>>> print((type(fcp2)))
|
|
<class 'nltk.parse.featurechart.FeatureChartParser'>
|
|
|
|
|
|
|