laywerrobot/lib/python3.6/site-packages/gensim/_matutils.pyx

#!/usr/bin/env cython
# coding: utf-8
# cython: embedsignature=True

from __future__ import division
cimport cython
import numpy as np
cimport numpy as np
ctypedef cython.floating DTYPE_t
from libc.math cimport log, exp, fabs
from cython.parallel import prange


def mean_absolute_difference(a, b):
    """Mean absolute difference between two arrays, using :func:`~gensim._matutils._mean_absolute_difference`.

    Parameters
    ----------
    a : numpy.ndarray
        Input 1d array, supports float16, float32 and float64.
    b : numpy.ndarray
        Input 1d array, supports float16, float32 and float64.

    Returns
    -------
    float
        mean(abs(a - b)).

    """
    if a.shape != b.shape:
        raise ValueError("a and b must have same shape")

    if a.dtype == np.float64:
        return _mean_absolute_difference[double](a, b)
    elif a.dtype == np.float32:
        return _mean_absolute_difference[float](a, b)
    elif a.dtype == np.float16:
        return _mean_absolute_difference[float](a.astype(np.float32), b.astype(np.float32))


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:
    """Mean absolute difference between two arrays.

    Parameters
    ----------
    a : numpy.ndarray
        Input 1d array.
    b : numpy.ndarray
        Input 1d array.

    Returns
    -------
    DTYPE_t
        mean(abs(a - b))

    """

    cdef DTYPE_t result = 0.0
    cdef size_t i
    cdef size_t j

    cdef size_t I = a.shape[0]
    cdef size_t N = I

    for i in range(I):
        result += fabs(a[i] - b[i])
    result /= N

    return result


def logsumexp(x):
    """Log of sum of exponentials, using :func:`~gensim._matutils._logsumexp_2d`.

    Parameters
    ----------
    x : numpy.ndarray
        Input 2d matrix, supports float16, float32 and float64.

    Returns
    -------
    float
        log of sum of exponentials of elements in `x`.

    Warnings
    --------
    By performance reasons, doesn't support NaNs or 1d, 3d, etc arrays like :func:`scipy.special.logsumexp`.

    """

    if x.dtype == np.float64:
        return _logsumexp_2d[double](x)
    elif x.dtype == np.float32:
        return _logsumexp_2d[float](x)
    elif x.dtype == np.float16:
        return _logsumexp_2d[float](x.astype(np.float32))


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:
    """Log of sum of exponentials.

    Parameters
    ----------
    x : numpy.ndarray
        Input 2d matrix.

    Returns
    -------
    DTYPE_t
        log of sum of exponentials of elements in `data`.

    """

    cdef DTYPE_t max_val = data[0, 0]
    cdef DTYPE_t result = 0.0
    cdef size_t i
    cdef size_t j

    cdef size_t I = data.shape[0]
    cdef size_t J = data.shape[1]

    for i in range(I):
        for j in range(J):
            if data[i, j] > max_val:
                max_val = data[i, j]

    for i in range(I):
        for j in range(J):
            result += exp(data[i, j] - max_val)

    result = log(result) + max_val

    return result


def dirichlet_expectation(alpha):
    """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
    Using :func:`~gensim._matutils.dirichlet_expectation_1d` or :func:`~gensim._matutils.dirichlet_expectation_2d`.

    Parameters
    ----------
    alpha : numpy.ndarray
        Dirichlet parameter 2d matrix or 1d vector, if 2d - each row is treated as a separate parameter vector,
        supports float16, float32 and float64.

    Returns
    -------
    numpy.ndarray
        Log of expected values, dimension same as `alpha.ndim`.

    """
    if alpha.ndim == 2:
        return dirichlet_expectation_2d(alpha)
    else:
        return dirichlet_expectation_1d(alpha)


def dirichlet_expectation_2d(alpha):
    """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
    Using :func:`~gensim._matutils._dirichlet_expectation_2d`.

    Parameters
    ----------
    alpha : numpy.ndarray
        Dirichlet parameter 2d matrix, each row is treated as a separate parameter vector,
        supports float16, float32 and float64.

    Returns
    -------
    numpy.ndarray
        Log of expected values, 2d matrix.

    """
    if alpha.dtype == np.float64:
        out = np.zeros(alpha.shape, dtype=alpha.dtype)
        _dirichlet_expectation_2d[double](alpha, out)
    elif alpha.dtype == np.float32:
        out = np.zeros(alpha.shape, dtype=alpha.dtype)
        _dirichlet_expectation_2d[float](alpha, out)
    elif alpha.dtype == np.float16:
        out = np.zeros(alpha.shape, dtype=np.float32)
        _dirichlet_expectation_2d[float](alpha.astype(np.float32), out)
        out = out.astype(np.float16)

    return out


def dirichlet_expectation_1d(alpha):
    """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
    Using :func:`~gensim._matutils._dirichlet_expectation_1d`.

    Parameters
    ----------
    alpha : numpy.ndarray
        Dirichlet parameter 1d vector, supports float16, float32 and float64.

    Returns
    -------
    numpy.ndarray
        Log of expected values, 1d vector.

    """
    if alpha.dtype == np.float64:
        out = np.zeros(alpha.shape, dtype=alpha.dtype)
        _dirichlet_expectation_1d[double](alpha, out)
    elif alpha.dtype == np.float32:
        out = np.zeros(alpha.shape, dtype=alpha.dtype)
        _dirichlet_expectation_1d[float](alpha, out)
    elif alpha.dtype == np.float16:
        out = np.zeros(alpha.shape, dtype=np.float32)
        _dirichlet_expectation_1d[float](alpha.astype(np.float32), out)
        out = out.astype(np.float16)

    return out


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
    """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.

    Parameters
    ----------
    alpha : numpy.ndarray
        Dirichlet parameter 1d vector.

    out : numpy.ndarray
        Output array, contains log of expected values.

    """
    cdef DTYPE_t sum_alpha = 0.0
    cdef DTYPE_t psi_sum_alpha = 0.0
    cdef size_t i
    cdef size_t I = alpha.shape[0]

    for i in range(I):
        sum_alpha += alpha[i]

    psi_sum_alpha = _digamma(sum_alpha)

    for i in range(I):
        out[i] = _digamma(alpha[i]) - psi_sum_alpha


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:
    """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.

    Parameters
    ----------
    alpha : numpy.ndarray
        Dirichlet parameter matrix, each row is treated as a parameter vector for its own Dirichlet.

    out : numpy.ndarray
        Log of expected values, 2d matrix.

    """
    cdef DTYPE_t sum_alpha = 0.0
    cdef DTYPE_t psi_sum_alpha = 0.0
    cdef size_t i, j
    cdef size_t I = alpha.shape[0]
    cdef size_t J = alpha.shape[1]

    for i in range(I):
        sum_alpha = 0.0
        for j in range(J):
            sum_alpha += alpha[i, j]

        psi_sum_alpha = _digamma(sum_alpha)

        for j in range(J):
            out[i, j] = _digamma(alpha[i, j]) - psi_sum_alpha


def digamma(DTYPE_t x):
    """Digamma function for positive floats, using :func:`~gensim._matutils._digamma`.

    Parameters
    ----------
    x : float
        Positive value.

    Returns
    -------
    float
        Digamma(x).

    """
    return _digamma(x)


@cython.cdivision(True)
cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:
    """Digamma function for positive floats.

    Parameters
    ----------
    x : float
        Positive value.

    Notes
    -----

    Adapted from:

    * Authors:
        * Original FORTRAN77 version by Jose Bernardo.
        * C version by John Burkardt.

    * Reference: Jose Bernardo, Algorithm AS 103: Psi (Digamma) Function,
      Applied Statistics, Volume 25, Number 3, 1976, pages 315-317.

    * Licensing: This code is distributed under the GNU LGPL license.


    Returns
    -------
    float
        Digamma(x).

    """
    cdef DTYPE_t c = 8.5;
    cdef DTYPE_t euler_mascheroni = 0.57721566490153286060;
    cdef DTYPE_t r;
    cdef DTYPE_t value;
    cdef DTYPE_t x2;

    if ( x <= 0.000001 ):
        value = - euler_mascheroni - 1.0 / x + 1.6449340668482264365 * x;
        return value;

    # Reduce to DIGAMA(X + N).
    value = 0.0;
    x2 = x;
    while ( x2 < c ):
        value = value - 1.0 / x2;
        x2 = x2 + 1.0;

    # Use Stirling's (actually de Moivre's) expansion.
    r = 1.0 / x2;
    value = value + log ( x2 ) - 0.5 * r;

    r = r * r;

    value = value \
        - r * ( 1.0 / 12.0  \
        - r * ( 1.0 / 120.0 \
        - r * ( 1.0 / 252.0 \
        - r * ( 1.0 / 240.0 \
        - r * ( 1.0 / 132.0 ) ) ) ) )

    return value;
first commit 2020-08-27 21:55:39 +02:00			`#!/usr/bin/env cython`
			`# coding: utf-8`
			`# cython: embedsignature=True`

			`from __future__ import division`
			`cimport cython`
			`import numpy as np`
			`cimport numpy as np`
			`ctypedef cython.floating DTYPE_t`
			`from libc.math cimport log, exp, fabs`
			`from cython.parallel import prange`


			`def mean_absolute_difference(a, b):`
			"""Mean absolute difference between two arrays, using :func:`~gensim._matutils._mean_absolute_difference`.

			`Parameters`
			`----------`
			`a : numpy.ndarray`
			`Input 1d array, supports float16, float32 and float64.`
			`b : numpy.ndarray`
			`Input 1d array, supports float16, float32 and float64.`

			`Returns`
			`-------`
			`float`
			`mean(abs(a - b)).`

			`"""`
			`if a.shape != b.shape:`
			`raise ValueError("a and b must have same shape")`

			`if a.dtype == np.float64:`
			`return _mean_absolute_difference[double](a, b)`
			`elif a.dtype == np.float32:`
			`return _mean_absolute_difference[float](a, b)`
			`elif a.dtype == np.float16:`
			`return _mean_absolute_difference[float](a.astype(np.float32), b.astype(np.float32))`


			`@cython.boundscheck(False)`
			`@cython.wraparound(False)`
			`@cython.cdivision(True)`
			`cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:`
			`"""Mean absolute difference between two arrays.`

			`Parameters`
			`----------`
			`a : numpy.ndarray`
			`Input 1d array.`
			`b : numpy.ndarray`
			`Input 1d array.`

			`Returns`
			`-------`
			`DTYPE_t`
			`mean(abs(a - b))`

			`"""`

			`cdef DTYPE_t result = 0.0`
			`cdef size_t i`
			`cdef size_t j`

			`cdef size_t I = a.shape[0]`
			`cdef size_t N = I`

			`for i in range(I):`
			`result += fabs(a[i] - b[i])`
			`result /= N`

			`return result`


			`def logsumexp(x):`
			"""Log of sum of exponentials, using :func:`~gensim._matutils._logsumexp_2d`.

			`Parameters`
			`----------`
			`x : numpy.ndarray`
			`Input 2d matrix, supports float16, float32 and float64.`

			`Returns`
			`-------`
			`float`
			log of sum of exponentials of elements in `x`.

			`Warnings`
			`--------`
			By performance reasons, doesn't support NaNs or 1d, 3d, etc arrays like :func:`scipy.special.logsumexp`.

			`"""`

			`if x.dtype == np.float64:`
			`return _logsumexp_2d[double](x)`
			`elif x.dtype == np.float32:`
			`return _logsumexp_2d[float](x)`
			`elif x.dtype == np.float16:`
			`return _logsumexp_2d[float](x.astype(np.float32))`


			`@cython.boundscheck(False)`
			`@cython.wraparound(False)`
			`@cython.cdivision(True)`
			`cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:`
			`"""Log of sum of exponentials.`

			`Parameters`
			`----------`
			`x : numpy.ndarray`
			`Input 2d matrix.`

			`Returns`
			`-------`
			`DTYPE_t`
			log of sum of exponentials of elements in `data`.

			`"""`

			`cdef DTYPE_t max_val = data[0, 0]`
			`cdef DTYPE_t result = 0.0`
			`cdef size_t i`
			`cdef size_t j`

			`cdef size_t I = data.shape[0]`
			`cdef size_t J = data.shape[1]`

			`for i in range(I):`
			`for j in range(J):`
			`if data[i, j] > max_val:`
			`max_val = data[i, j]`

			`for i in range(I):`
			`for j in range(J):`
			`result += exp(data[i, j] - max_val)`

			`result = log(result) + max_val`

			`return result`


			`def dirichlet_expectation(alpha):`
			`"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.`
			Using :func:`~gensim._matutils.dirichlet_expectation_1d` or :func:`~gensim._matutils.dirichlet_expectation_2d`.

			`Parameters`
			`----------`
			`alpha : numpy.ndarray`
			`Dirichlet parameter 2d matrix or 1d vector, if 2d - each row is treated as a separate parameter vector,`
			`supports float16, float32 and float64.`

			`Returns`
			`-------`
			`numpy.ndarray`
			Log of expected values, dimension same as `alpha.ndim`.

			`"""`
			`if alpha.ndim == 2:`
			`return dirichlet_expectation_2d(alpha)`
			`else:`
			`return dirichlet_expectation_1d(alpha)`


			`def dirichlet_expectation_2d(alpha):`
			`"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.`
			Using :func:`~gensim._matutils._dirichlet_expectation_2d`.

			`Parameters`
			`----------`
			`alpha : numpy.ndarray`
			`Dirichlet parameter 2d matrix, each row is treated as a separate parameter vector,`
			`supports float16, float32 and float64.`

			`Returns`
			`-------`
			`numpy.ndarray`
			`Log of expected values, 2d matrix.`

			`"""`
			`if alpha.dtype == np.float64:`
			`out = np.zeros(alpha.shape, dtype=alpha.dtype)`
			`_dirichlet_expectation_2d[double](alpha, out)`
			`elif alpha.dtype == np.float32:`
			`out = np.zeros(alpha.shape, dtype=alpha.dtype)`
			`_dirichlet_expectation_2d[float](alpha, out)`
			`elif alpha.dtype == np.float16:`
			`out = np.zeros(alpha.shape, dtype=np.float32)`
			`_dirichlet_expectation_2d[float](alpha.astype(np.float32), out)`
			`out = out.astype(np.float16)`

			`return out`


			`def dirichlet_expectation_1d(alpha):`
			`"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.`
			Using :func:`~gensim._matutils._dirichlet_expectation_1d`.

			`Parameters`
			`----------`
			`alpha : numpy.ndarray`
			`Dirichlet parameter 1d vector, supports float16, float32 and float64.`

			`Returns`
			`-------`
			`numpy.ndarray`
			`Log of expected values, 1d vector.`

			`"""`
			`if alpha.dtype == np.float64:`
			`out = np.zeros(alpha.shape, dtype=alpha.dtype)`
			`_dirichlet_expectation_1d[double](alpha, out)`
			`elif alpha.dtype == np.float32:`
			`out = np.zeros(alpha.shape, dtype=alpha.dtype)`
			`_dirichlet_expectation_1d[float](alpha, out)`
			`elif alpha.dtype == np.float16:`
			`out = np.zeros(alpha.shape, dtype=np.float32)`
			`_dirichlet_expectation_1d[float](alpha.astype(np.float32), out)`
			`out = out.astype(np.float16)`

			`return out`


			`@cython.boundscheck(False)`
			`@cython.wraparound(False)`
			`cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:`
			`"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.`

			`Parameters`
			`----------`
			`alpha : numpy.ndarray`
			`Dirichlet parameter 1d vector.`

			`out : numpy.ndarray`
			`Output array, contains log of expected values.`

			`"""`
			`cdef DTYPE_t sum_alpha = 0.0`
			`cdef DTYPE_t psi_sum_alpha = 0.0`
			`cdef size_t i`
			`cdef size_t I = alpha.shape[0]`

			`for i in range(I):`
			`sum_alpha += alpha[i]`

			`psi_sum_alpha = _digamma(sum_alpha)`

			`for i in range(I):`
			`out[i] = _digamma(alpha[i]) - psi_sum_alpha`


			`@cython.boundscheck(False)`
			`@cython.wraparound(False)`
			`cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:`
			`"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.`

			`Parameters`
			`----------`
			`alpha : numpy.ndarray`
			`Dirichlet parameter matrix, each row is treated as a parameter vector for its own Dirichlet.`

			`out : numpy.ndarray`
			`Log of expected values, 2d matrix.`

			`"""`
			`cdef DTYPE_t sum_alpha = 0.0`
			`cdef DTYPE_t psi_sum_alpha = 0.0`
			`cdef size_t i, j`
			`cdef size_t I = alpha.shape[0]`
			`cdef size_t J = alpha.shape[1]`

			`for i in range(I):`
			`sum_alpha = 0.0`
			`for j in range(J):`
			`sum_alpha += alpha[i, j]`

			`psi_sum_alpha = _digamma(sum_alpha)`

			`for j in range(J):`
			`out[i, j] = _digamma(alpha[i, j]) - psi_sum_alpha`


			`def digamma(DTYPE_t x):`
			"""Digamma function for positive floats, using :func:`~gensim._matutils._digamma`.

			`Parameters`
			`----------`
			`x : float`
			`Positive value.`

			`Returns`
			`-------`
			`float`
			`Digamma(x).`

			`"""`
			`return _digamma(x)`


			`@cython.cdivision(True)`
			`cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:`
			`"""Digamma function for positive floats.`

			`Parameters`
			`----------`
			`x : float`
			`Positive value.`

			`Notes`
			`-----`

			`Adapted from:`

			`* Authors:`
			`* Original FORTRAN77 version by Jose Bernardo.`
			`* C version by John Burkardt.`

			`* Reference: Jose Bernardo, Algorithm AS 103: Psi (Digamma) Function,`
			`Applied Statistics, Volume 25, Number 3, 1976, pages 315-317.`

			`* Licensing: This code is distributed under the GNU LGPL license.`


			`Returns`
			`-------`
			`float`
			`Digamma(x).`

			`"""`
			`cdef DTYPE_t c = 8.5;`
			`cdef DTYPE_t euler_mascheroni = 0.57721566490153286060;`
			`cdef DTYPE_t r;`
			`cdef DTYPE_t value;`
			`cdef DTYPE_t x2;`

			`if ( x <= 0.000001 ):`
			`value = - euler_mascheroni - 1.0 / x + 1.6449340668482264365 * x;`
			`return value;`

			`# Reduce to DIGAMA(X + N).`
			`value = 0.0;`
			`x2 = x;`
			`while ( x2 < c ):`
			`value = value - 1.0 / x2;`
			`x2 = x2 + 1.0;`

			`# Use Stirling's (actually de Moivre's) expansion.`
			`r = 1.0 / x2;`
			`value = value + log ( x2 ) - 0.5 * r;`

			`r = r * r;`

			`value = value \`
			`- r * ( 1.0 / 12.0 \`
			`- r * ( 1.0 / 120.0 \`
			`- r * ( 1.0 / 252.0 \`
			`- r * ( 1.0 / 240.0 \`
			`- r * ( 1.0 / 132.0 ) ) ) ) )`

			`return value;`