# The new class FASTsearch. Every DB can be represented in Lists. The Brain actually is constituted from lists. Access to all Documents the same moment.

# TODO GPU Multithreading has to be implemented.


# USAGE: Learn scikit-learn count vectorizer on a database of lines or docs. 

from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer

import numpy as np
import scipy as sc

import tensorflow as tf

import _pickle as cPickle

import hickle as hkl

import os


# Define function to convert scipy csr matrix to tf tensor for working on gpu
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = sc.sparse.coo_matrix(X)
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.data, coo.shape)


class FASTsearch(object):
    
    def __init__(self, DatabaseDir, BoWModelDir):
        
        # input has to be hkl format
        self.database = hkl.load(DatabaseDir).astype('float32')
        
        # input has to be pkl format
        self.vectorizer = joblib.load(BoWModelDir)

    def search(self, string , numberofmatches):
        
        
        numberofmatches = numberofmatches
        
        
        # Convert user input to Zeros and Ones
        user_array = []
        user_array.append(string)

        user_input_OnesZeros = self.vectorizer.transform(user_array)
        uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)

        uiOZ = uOZ[np.newaxis, :]

        uiOZ = uiOZ.transpose()


        sess = tf.Session()

        with sess.as_default():

            uiOZ_tensor = tf.constant(uiOZ)
            
            dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(dbOZ)
            
            #uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
            #dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
            
            
            #wordCountDoku =  tf.matmul(uiOZ_tensor, dbOZ_tensor)
            wordCountDoku =  tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
            
            wCD = np.array(wordCountDoku.eval())
            

        indexedwCD = []
        for n in range(len(wCD)):
            indexedwCD.append([n,wCD[n][0]])


        indexedwCD = sorted(indexedwCD[::-1], key=lambda tup: tup[1], reverse=True)

        best_n_documents = []


        for n in range(numberofmatches):

            best_n_documents.append(indexedwCD[n][0])
            
        
        return best_n_documents