laywerrobot/lib/python3.6/site-packages/nltk/test/unit/test_senna.py

75 lines
3.1 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
"""
Unit tests for Senna
"""
from __future__ import unicode_literals
from os import environ, path, sep
import logging
import unittest
from nltk.classify import Senna
from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger
# Set Senna executable path for tests if it is not specified as an environment variable
if 'SENNA' in environ:
SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
else:
SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'
senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaPipeline(unittest.TestCase):
"""Unittest for nltk.classify.senna"""
def test_senna_pipeline(self):
"""Senna pipeline interface"""
pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
sent = 'Dusseldorf is an international business center'.split()
result = [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)]
expected = [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP',
'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'), ('international', 'I-NP',
'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP',
'O', 'NN')]
self.assertEqual(result, expected)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaTagger(unittest.TestCase):
"""Unittest for nltk.tag.senna"""
def test_senna_tagger(self):
tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
expected = [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed',
'NN'),('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow',
'NN'), ('?', '.')]
self.assertEqual(result, expected)
def test_senna_chunk_tagger(self):
chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
expected_1 = [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed',
'I-NP'), ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow',
'I-NP'), ('?', 'O')]
result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
expected_2 = [('What', '0'), ('the airspeed', '2-3'), ('an unladen swallow',
'5-6-7')]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)
def test_senna_ner_tagger(self):
nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
expected_1 = [('Shakespeare', 'B-PER'), ('theatre', 'O'), ('was', 'O'),
('in', 'O'), ('London', 'B-LOC'), ('.', 'O')]
result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
expected_2 = [('UN', 'B-ORG'), ('headquarters', 'O'), ('are', 'O'),
('in', 'O'), ('NY', 'B-LOC'), (',', 'O'), ('USA', 'B-LOC'), ('.', 'O')]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)