laywerrobot/lib/python3.6/site-packages/nltk/tbl/erroranalysis.py
2020-08-27 21:55:39 +02:00

39 lines
1.4 KiB
Python

# -*- coding: utf-8 -*-
# Natural Language Toolkit: Transformation-based learning
#
# Copyright (C) 2001-2018 NLTK Project
# Author: Marcus Uneson <marcus.uneson@gmail.com>
# based on previous (nltk2) version by
# Christopher Maloof, Edward Loper, Steven Bird
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
from __future__ import print_function
# returns a list of errors in string format
def error_list(train_sents, test_sents):
"""
Returns a list of human-readable strings indicating the errors in the
given tagging of the corpus.
:param train_sents: The correct tagging of the corpus
:type train_sents: list(tuple)
:param test_sents: The tagged corpus
:type test_sents: list(tuple)
"""
hdr = (('%25s | %s | %s\n' + '-'*26+'+'+'-'*24+'+'+'-'*26) %
('left context', 'word/test->gold'.center(22), 'right context'))
errors = [hdr]
for (train_sent, test_sent) in zip(train_sents, test_sents):
for wordnum, (word, train_pos) in enumerate(train_sent):
test_pos = test_sent[wordnum][1]
if train_pos != test_pos:
left = ' '.join('%s/%s' % w for w in train_sent[:wordnum])
right = ' '.join('%s/%s' % w for w in train_sent[wordnum+1:])
mid = '%s/%s->%s' % (word, test_pos, train_pos)
errors.append('%25s | %s | %s' %
(left[-25:], mid.center(22), right[:25]))
return errors