laywerrobot/lib/python3.6/site-packages/gensim/topic_coherence/segmentation.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

"""This module contains functions to perform segmentation on a list of topics."""

import logging

logger = logging.getLogger(__name__)


def s_one_pre(topics):
    """Performs segmentation on a list of topics.

    Notes
    -----
    Segmentation is defined as
    :math:`s_{pre} = {(W', W^{*}) | W' = w_{i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i > j}`.

    Parameters
    ----------
    topics : list of np.array
        list of topics obtained from an algorithm such as LDA.

    Returns
    -------
    list of list of (int, int)
        :math:`(W', W^{*})` for all unique topic ids.

    Examples
    --------
    >>> import numpy as np
    >>> from gensim.topic_coherence import segmentation
    >>>
    >>> topics = [np.array([1, 2, 3]), np.array([4, 5, 6])]
    >>> segmentation.s_one_pre(topics)
    [[(2, 1), (3, 1), (3, 2)], [(5, 4), (6, 4), (6, 5)]]

    """
    s_one_pre_res = []

    for top_words in topics:
        s_one_pre_t = []
        for w_prime_index, w_prime in enumerate(top_words[1:]):
            for w_star in top_words[:w_prime_index + 1]:
                s_one_pre_t.append((w_prime, w_star))
        s_one_pre_res.append(s_one_pre_t)

    return s_one_pre_res


def s_one_one(topics):
    """Perform segmentation on a list of topics.
    Segmentation is defined as
    :math:`s_{one} = {(W', W^{*}) | W' = {w_i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i \\neq j}`.

    Parameters
    ----------
    topics : list of `numpy.ndarray`
        List of topics obtained from an algorithm such as LDA.

    Returns
    -------
    list of list of (int, int).
        :math:`(W', W^{*})` for all unique topic ids.

    Examples
    -------
    >>> import numpy as np
    >>> from gensim.topic_coherence import segmentation
    >>>
    >>> topics = [np.array([1, 2, 3]), np.array([4, 5, 6])]
    >>> segmentation.s_one_one(topics)
    [[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)], [(4, 5), (4, 6), (5, 4), (5, 6), (6, 4), (6, 5)]]

    """
    s_one_one_res = []

    for top_words in topics:
        s_one_one_t = []
        for w_prime_index, w_prime in enumerate(top_words):
            for w_star_index, w_star in enumerate(top_words):
                if w_prime_index == w_star_index:
                    continue
                else:
                    s_one_one_t.append((w_prime, w_star))
        s_one_one_res.append(s_one_one_t)

    return s_one_one_res


def s_one_set(topics):
    """Perform s_one_set segmentation on a list of topics.
    Segmentation is defined as
    :math:`s_{set} = {(W', W^{*}) | W' = {w_i}; w_{i} \in W; W^{*} = W}`

    Parameters
    ----------
    topics : list of `numpy.ndarray`
        List of topics obtained from an algorithm such as LDA.

    Returns
    -------
    list of list of (int, int).
        :math:`(W', W^{*})` for all unique topic ids.

    Examples
    --------
    >>> import numpy as np
    >>> from gensim.topic_coherence import segmentation
    >>>
    >>> topics = [np.array([9, 10, 7])]
    >>> segmentation.s_one_set(topics)
    [[(9, array([ 9, 10,  7])), (10, array([ 9, 10,  7])), (7, array([ 9, 10,  7]))]]

    """
    s_one_set_res = []

    for top_words in topics:
        s_one_set_t = []
        for w_prime in top_words:
            s_one_set_t.append((w_prime, top_words))
        s_one_set_res.append(s_one_set_t)

    return s_one_set_res