laywerrobot/lib/python3.6/site-packages/gensim/summarization/syntactic_unit.py

54 lines
1.3 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
"""This module contains implementation of SyntacticUnit class. It generally used while text cleaning.
:class:`~gensim.summarization.syntactic_unit.SyntacticUnit` represents printable version of provided text.
"""
class SyntacticUnit(object):
"""SyntacticUnit class.
Attributes
----------
text : str
Input text.
token : str
Tokenized text.
tag : str
Tag of unit, optional.
index : int
Index of sytactic unit in corpus, optional.
score : float
Score of synctatic unit, optional.
"""
def __init__(self, text, token=None, tag=None):
"""
Parameters
----------
text : str
Input text.
token : str
Tokenized text, optional.
tag : str
Tag of unit, optional.
"""
self.text = text
self.token = token
self.tag = tag[:2] if tag else None # Just first two letters of tag
self.index = -1
self.score = -1
def __str__(self):
return "Original unit: '" + self.text + "' *-*-*-* " + "Processed unit: '" + self.token + "'"
def __repr__(self):
return str(self)