laywerrobot/lib/python3.6/site-packages/gensim/test/test_indirect_confirmation.py
2020-08-27 21:55:39 +02:00

74 lines
2.8 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
"""
Automated tests for indirect confirmation measures in the indirect_confirmation_measure module.
"""
import logging
import unittest
import numpy as np
from gensim.corpora.dictionary import Dictionary
from gensim.topic_coherence import indirect_confirmation_measure
from gensim.topic_coherence import text_analysis
class TestIndirectConfirmation(unittest.TestCase):
def setUp(self):
# Set up toy example for better understanding and testing
# of this module. See the modules for the mathematical formulas
self.topics = [np.array([1, 2])]
# Result from s_one_set segmentation:
self.segmentation = [[(1, np.array([1, 2])), (2, np.array([1, 2]))]]
self.gamma = 1
self.measure = 'nlr'
self.dictionary = Dictionary()
self.dictionary.id2token = {1: 'fake', 2: 'tokens'}
def testCosineSimilarity(self):
"""Test cosine_similarity()"""
accumulator = text_analysis.InvertedIndexAccumulator({1, 2}, self.dictionary)
accumulator._inverted_index = {0: {2, 3, 4}, 1: {3, 5}}
accumulator._num_docs = 5
obtained = indirect_confirmation_measure.cosine_similarity(
self.segmentation, accumulator, self.topics, self.measure, self.gamma)
# The steps involved in this calculation are as follows:
# 1. Take (1, array([1, 2]). Take w' which is 1.
# 2. Calculate nlr(1, 1), nlr(1, 2). This is our first vector.
# 3. Take w* which is array([1, 2]).
# 4. Calculate nlr(1, 1) + nlr(2, 1). Calculate nlr(1, 2), nlr(2, 2). This is our second vector.
# 5. Find out cosine similarity between these two vectors.
# 6. Similarly for the second segmentation.
expected = (0.6230 + 0.6230) / 2. # To account for EPSILON approximation
self.assertAlmostEqual(expected, obtained[0], 4)
mean, std = indirect_confirmation_measure.cosine_similarity(
self.segmentation, accumulator, self.topics, self.measure, self.gamma,
with_std=True)[0]
self.assertAlmostEqual(expected, mean, 4)
self.assertAlmostEqual(0.0, std, 1)
def testWord2VecSimilarity(self):
"""Sanity check word2vec_similarity."""
accumulator = text_analysis.WordVectorsAccumulator({1, 2}, self.dictionary)
accumulator.accumulate([
['fake', 'tokens'],
['tokens', 'fake']
], 5)
mean, std = indirect_confirmation_measure.word2vec_similarity(
self.segmentation, accumulator, with_std=True)[0]
self.assertNotEqual(0.0, mean)
self.assertNotEqual(0.0, std)
if __name__ == '__main__':
logging.root.setLevel(logging.WARNING)
unittest.main()