laywerrobot/lib/python3.6/site-packages/gensim/test/test_normmodel.py
2020-08-27 21:55:39 +02:00

155 lines
5.6 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
"""
Automated tests for checking transformation algorithms (the models package).
"""
import logging
import unittest
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import issparse
from gensim.corpora import mmcorpus
from gensim.models import normmodel
from gensim.test.utils import datapath, get_tmpfile
class TestNormModel(unittest.TestCase):
def setUp(self):
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
# Choose doc to be normalized. [3] chosen to demonstrate different results for l1 and l2 norm.
# doc is [(1, 1.0), (5, 2.0), (8, 1.0)]
self.doc = list(self.corpus)[3]
self.model_l1 = normmodel.NormModel(self.corpus, norm='l1')
self.model_l2 = normmodel.NormModel(self.corpus, norm='l2')
def test_tupleInput_l1(self):
"""Test tuple input for l1 transformation"""
normalized = self.model_l1.normalize(self.doc)
expected = [(1, 0.25), (5, 0.5), (8, 0.25)]
self.assertTrue(np.allclose(normalized, expected))
def test_sparseCSRInput_l1(self):
"""Test sparse csr matrix input for l1 transformation"""
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
normalized = self.model_l1.normalize(sparse_matrix)
# Check if output is of same type
self.assertTrue(issparse(normalized))
# Check if output is correct
expected = np.array([[0.04761905, 0., 0.0952381],
[0., 0., 0.14285714],
[0.19047619, 0.23809524, 0.28571429]])
self.assertTrue(np.allclose(normalized.toarray(), expected))
def test_numpyndarrayInput_l1(self):
"""Test for np ndarray input for l1 transformation"""
ndarray_matrix = np.array([
[1, 0, 2],
[0, 0, 3],
[4, 5, 6]
])
normalized = self.model_l1.normalize(ndarray_matrix)
# Check if output is of same type
self.assertTrue(isinstance(normalized, np.ndarray))
# Check if output is correct
expected = np.array([
[0.04761905, 0., 0.0952381],
[0., 0., 0.14285714],
[0.19047619, 0.23809524, 0.28571429]
])
self.assertTrue(np.allclose(normalized, expected))
# Test if error is raised on unsupported input type
self.assertRaises(ValueError, lambda model, doc: model.normalize(doc), self.model_l1, [1, 2, 3])
def test_tupleInput_l2(self):
"""Test tuple input for l2 transformation"""
normalized = self.model_l2.normalize(self.doc)
expected = [(1, 0.4082482904638631), (5, 0.8164965809277261), (8, 0.4082482904638631)]
self.assertTrue(np.allclose(normalized, expected))
def test_sparseCSRInput_l2(self):
"""Test sparse csr matrix input for l2 transformation"""
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
normalized = self.model_l2.normalize(sparse_matrix)
# Check if output is of same type
self.assertTrue(issparse(normalized))
# Check if output is correct
expected = np.array([
[0.10482848, 0., 0.20965697],
[0., 0., 0.31448545],
[0.41931393, 0.52414242, 0.6289709]
])
self.assertTrue(np.allclose(normalized.toarray(), expected))
def test_numpyndarrayInput_l2(self):
"""Test for np ndarray input for l2 transformation"""
ndarray_matrix = np.array([
[1, 0, 2],
[0, 0, 3],
[4, 5, 6]
])
normalized = self.model_l2.normalize(ndarray_matrix)
# Check if output is of same type
self.assertTrue(isinstance(normalized, np.ndarray))
# Check if output is correct
expected = np.array([
[0.10482848, 0., 0.20965697],
[0., 0., 0.31448545],
[0.41931393, 0.52414242, 0.6289709]
])
self.assertTrue(np.allclose(normalized, expected))
# Test if error is raised on unsupported input type
self.assertRaises(ValueError, lambda model, doc: model.normalize(doc), self.model_l2, [1, 2, 3])
def testInit(self):
"""Test if error messages raised on unsupported norm"""
self.assertRaises(ValueError, normmodel.NormModel, self.corpus, 'l0')
def testPersistence(self):
fname = get_tmpfile('gensim_models.tst')
model = normmodel.NormModel(self.corpus)
model.save(fname)
model2 = normmodel.NormModel.load(fname)
self.assertTrue(model.norms == model2.norms)
tstvec = []
# try projecting an empty vector
self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))
def testPersistenceCompressed(self):
fname = get_tmpfile('gensim_models.tst.gz')
model = normmodel.NormModel(self.corpus)
model.save(fname)
model2 = normmodel.NormModel.load(fname, mmap=None)
self.assertTrue(model.norms == model2.norms)
tstvec = []
# try projecting an empty vector
self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))
if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
unittest.main()