laywerrobot/lib/python3.6/site-packages/gensim/summarization/pagerank_weighted.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

"""This module calculate PageRank [1]_ based on wordgraph.


.. [1] https://en.wikipedia.org/wiki/PageRank

Examples
--------

Calculate Pagerank for words

>>> from gensim.summarization.keywords import get_graph
>>> from gensim.summarization.pagerank_weighted import pagerank_weighted
>>> graph = get_graph("The road to hell is paved with good intentions.")
>>> # result will looks like {'good': 0.70432858653171504, 'hell': 0.051128871128006126, ...}
>>> result = pagerank_weighted(graph)

Build matrix from graph

>>> from gensim.summarization.pagerank_weighted import build_adjacency_matrix
>>> build_adjacency_matrix(graph).todense()
matrix([[ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]])

"""


import numpy
from numpy import empty as empty_matrix
from scipy.linalg import eig
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import eigs
from six.moves import xrange


def pagerank_weighted(graph, damping=0.85):
    """Get dictionary of `graph` nodes and its ranks.

    Parameters
    ----------
    graph : :class:`~gensim.summarization.graph.Graph`
        Given graph.
    damping : float
        Damping parameter, optional

    Returns
    -------
    dict
        Nodes of `graph` as keys, its ranks as values.

    """
    adjacency_matrix = build_adjacency_matrix(graph)
    probability_matrix = build_probability_matrix(graph)

    pagerank_matrix = damping * adjacency_matrix.todense() + (1 - damping) * probability_matrix

    vec = principal_eigenvector(pagerank_matrix.T)

    # Because pagerank_matrix is positive, vec is always real (i.e. not complex)
    return process_results(graph, vec.real)


def build_adjacency_matrix(graph):
    """Get matrix representation of given `graph`.

    Parameters
    ----------
    graph : :class:`~gensim.summarization.graph.Graph`
        Given graph.

    Returns
    -------
    :class:`scipy.sparse.csr_matrix`, shape = [n, n]
        Adjacency matrix of given `graph`, n is number of nodes.

    """
    row = []
    col = []
    data = []
    nodes = graph.nodes()
    length = len(nodes)

    for i in xrange(length):
        current_node = nodes[i]
        neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in graph.neighbors(current_node))
        for j in xrange(length):
            edge_weight = float(graph.edge_weight((current_node, nodes[j])))
            if i != j and edge_weight != 0.0:
                row.append(i)
                col.append(j)
                data.append(edge_weight / neighbors_sum)

    return csr_matrix((data, (row, col)), shape=(length, length))


def build_probability_matrix(graph):
    """Get square matrix of shape (n, n), where n is number of nodes of the
    given `graph`.

    Parameters
    ----------
    graph : :class:`~gensim.summarization.graph.Graph`
        Given graph.

    Returns
    -------
    numpy.ndarray, shape = [n, n]
        Eigenvector of matrix `a`, n is number of nodes of `graph`.

    """
    dimension = len(graph.nodes())
    matrix = empty_matrix((dimension, dimension))

    probability = 1.0 / float(dimension)
    matrix.fill(probability)

    return matrix


def principal_eigenvector(a):
    """Get eigenvector of square matrix `a`.

    Parameters
    ----------
    a : numpy.ndarray, shape = [n, n]
        Given matrix.

    Returns
    -------
    numpy.ndarray, shape = [n, ]
        Eigenvector of matrix `a`.

    """
    # Note that we prefer to use `eigs` even for dense matrix
    # because we need only one eigenvector. See #441, #438 for discussion.

    # But it doesn't work for dim A < 3, so we just handle this special case
    if len(a) < 3:
        vals, vecs = eig(a)
        ind = numpy.abs(vals).argmax()
        return vecs[:, ind]
    else:
        vals, vecs = eigs(a, k=1)
        return vecs[:, 0]


def process_results(graph, vec):
    """Get `graph` nodes and corresponding absolute values of provided eigenvector.
    This function is helper for :func:`~gensim.summarization.pagerank_weighted.pagerank_weighted`

    Parameters
    ----------
    graph : :class:`~gensim.summarization.graph.Graph`
        Given graph.
    vec : numpy.ndarray, shape = [n, ]
        Given eigenvector, n is number of nodes of `graph`.

    Returns
    -------
    dict
        Graph nodes as keys, corresponding elements of eigenvector as values.

    """
    scores = {}
    for i, node in enumerate(graph.nodes()):
        scores[node] = abs(vec[i])

    return scores
first commit 2020-08-27 21:55:39 +02:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`#`
			`# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html`

			`"""This module calculate PageRank [1]_ based on wordgraph.`


			`.. [1] https://en.wikipedia.org/wiki/PageRank`

			`Examples`
			`--------`

			`Calculate Pagerank for words`

			`>>> from gensim.summarization.keywords import get_graph`
			`>>> from gensim.summarization.pagerank_weighted import pagerank_weighted`
			`>>> graph = get_graph("The road to hell is paved with good intentions.")`
			`>>> # result will looks like {'good': 0.70432858653171504, 'hell': 0.051128871128006126, ...}`
			`>>> result = pagerank_weighted(graph)`

			`Build matrix from graph`

			`>>> from gensim.summarization.pagerank_weighted import build_adjacency_matrix`
			`>>> build_adjacency_matrix(graph).todense()`
			`matrix([[ 0., 0., 0., 0., 0.],`
			`[ 0., 0., 1., 0., 0.],`
			`[ 0., 1., 0., 0., 0.],`
			`[ 0., 0., 0., 0., 0.],`
			`[ 0., 0., 0., 0., 0.]])`

			`"""`


			`import numpy`
			`from numpy import empty as empty_matrix`
			`from scipy.linalg import eig`
			`from scipy.sparse import csr_matrix`
			`from scipy.sparse.linalg import eigs`
			`from six.moves import xrange`


			`def pagerank_weighted(graph, damping=0.85):`
			"""Get dictionary of `graph` nodes and its ranks.

			`Parameters`
			`----------`
			graph : :class:`~gensim.summarization.graph.Graph`
			`Given graph.`
			`damping : float`
			`Damping parameter, optional`

			`Returns`
			`-------`
			`dict`
			Nodes of `graph` as keys, its ranks as values.

			`"""`
			`adjacency_matrix = build_adjacency_matrix(graph)`
			`probability_matrix = build_probability_matrix(graph)`

			`pagerank_matrix = damping * adjacency_matrix.todense() + (1 - damping) * probability_matrix`

			`vec = principal_eigenvector(pagerank_matrix.T)`

			`# Because pagerank_matrix is positive, vec is always real (i.e. not complex)`
			`return process_results(graph, vec.real)`


			`def build_adjacency_matrix(graph):`
			"""Get matrix representation of given `graph`.

			`Parameters`
			`----------`
			graph : :class:`~gensim.summarization.graph.Graph`
			`Given graph.`

			`Returns`
			`-------`
			:class:`scipy.sparse.csr_matrix`, shape = [n, n]
			Adjacency matrix of given `graph`, n is number of nodes.

			`"""`
			`row = []`
			`col = []`
			`data = []`
			`nodes = graph.nodes()`
			`length = len(nodes)`

			`for i in xrange(length):`
			`current_node = nodes[i]`
			`neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in graph.neighbors(current_node))`
			`for j in xrange(length):`
			`edge_weight = float(graph.edge_weight((current_node, nodes[j])))`
			`if i != j and edge_weight != 0.0:`
			`row.append(i)`
			`col.append(j)`
			`data.append(edge_weight / neighbors_sum)`

			`return csr_matrix((data, (row, col)), shape=(length, length))`


			`def build_probability_matrix(graph):`
			`"""Get square matrix of shape (n, n), where n is number of nodes of the`
			given `graph`.

			`Parameters`
			`----------`
			graph : :class:`~gensim.summarization.graph.Graph`
			`Given graph.`

			`Returns`
			`-------`
			`numpy.ndarray, shape = [n, n]`
			Eigenvector of matrix `a`, n is number of nodes of `graph`.

			`"""`
			`dimension = len(graph.nodes())`
			`matrix = empty_matrix((dimension, dimension))`

			`probability = 1.0 / float(dimension)`
			`matrix.fill(probability)`

			`return matrix`


			`def principal_eigenvector(a):`
			"""Get eigenvector of square matrix `a`.

			`Parameters`
			`----------`
			`a : numpy.ndarray, shape = [n, n]`
			`Given matrix.`

			`Returns`
			`-------`
			`numpy.ndarray, shape = [n, ]`
			Eigenvector of matrix `a`.

			`"""`
			# Note that we prefer to use `eigs` even for dense matrix
			`# because we need only one eigenvector. See #441, #438 for discussion.`

			`# But it doesn't work for dim A < 3, so we just handle this special case`
			`if len(a) < 3:`
			`vals, vecs = eig(a)`
			`ind = numpy.abs(vals).argmax()`
			`return vecs[:, ind]`
			`else:`
			`vals, vecs = eigs(a, k=1)`
			`return vecs[:, 0]`


			`def process_results(graph, vec):`
			"""Get `graph` nodes and corresponding absolute values of provided eigenvector.
			This function is helper for :func:`~gensim.summarization.pagerank_weighted.pagerank_weighted`

			`Parameters`
			`----------`
			graph : :class:`~gensim.summarization.graph.Graph`
			`Given graph.`
			`vec : numpy.ndarray, shape = [n, ]`
			Given eigenvector, n is number of nodes of `graph`.

			`Returns`
			`-------`
			`dict`
			`Graph nodes as keys, corresponding elements of eigenvector as values.`

			`"""`
			`scores = {}`
			`for i, node in enumerate(graph.nodes()):`
			`scores[node] = abs(vec[i])`

			`return scores`