laywerrobot/lib/python3.6/site-packages/nltk/draw/dispersion.py

59 lines
1.7 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# Natural Language Toolkit: Dispersion Plots
#
# Copyright (C) 2001-2018 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
"""
A utility for displaying lexical dispersion.
"""
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
"""
Generate a lexical dispersion plot.
:param text: The source text
:type text: list(str) or enum(str)
:param words: The target words
:type words: list of str
:param ignore_case: flag to set if case should be ignored when searching text
:type ignore_case: bool
"""
try:
from matplotlib import pylab
except ImportError:
raise ValueError('The plot function requires matplotlib to be installed.'
'See http://matplotlib.org/')
text = list(text)
words.reverse()
if ignore_case:
words_to_comp = list(map(str.lower, words))
text_to_comp = list(map(str.lower, text))
else:
words_to_comp = words
text_to_comp = text
points = [(x,y) for x in range(len(text_to_comp))
for y in range(len(words_to_comp))
if text_to_comp[x] == words_to_comp[y]]
if points:
x, y = list(zip(*points))
else:
x = y = ()
pylab.plot(x, y, "b|", scalex=.1)
pylab.yticks(list(range(len(words))), words, color="b")
pylab.ylim(-1, len(words))
pylab.title(title)
pylab.xlabel("Word Offset")
pylab.show()
if __name__ == '__main__':
import nltk.compat
from nltk.corpus import gutenberg
words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
dispersion_plot(gutenberg.words('austen-sense.txt'), words)