# prototype User Interface

from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer

import numpy as np
import scipy as sc

import tensorflow as tf

import _pickle as cPickle

import hickle as hkl

import os


# Define function to convert scipy csr matrix to tf tensor for working on gpu
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = sc.sparse.coo_matrix(X)
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.data, coo.shape)


#Load the zeros and ones from the database

dbOZ = hkl.load('bagofwords/OnesZerosDB_gzip.hkl')

print(len(dbOZ))

# transpose with csr transpose
#dbOZ = dbOZ.transpose()


# as type with csr as type
dbOZ = dbOZ.astype('float32')


#print(type(convert_sparse_matrix_to_sparse_tensor(dbOZ)))

#print('bla',dbOZ)

#dbOZ = dbOZ.transpose()


#dbOZ = np.transpose(np.array(dbOZ).astype(np.float32, copy=False))


#print('bla',dbOZ)

#dbOZ.transpose()

# Get the user input
user_input_words = input("Please describe your problem: ")

user_input_n = int(input("How many dokuments would you like to display?: "))
# Convert user input to Zeros and Ones
user_array = []
user_array.append(user_input_words)

print(user_array)

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("german")

user_array = [stemmer.stem(word) for word in user_array]

print(user_array)

vectorizer = joblib.load('bagofwords/bagofwords.pkl')

user_input_OnesZeros = vectorizer.transform(user_array)
uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)

#uiOZ = uOZ

#uiOZ = np.transpose(uOZ[np.newaxis, :])
uiOZ = uOZ[np.newaxis, :]

uiOZ = uiOZ.transpose()

#print('1', uiOZ)

#print('2', dbOZ)
sess = tf.Session()

with sess.as_default():
    uiOZ_tensor = tf.constant(uiOZ)
    
    dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(dbOZ)
    
    #uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
    #dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
    
    
    #wordCountDoku =  tf.matmul(uiOZ_tensor, dbOZ_tensor)
    wordCountDoku =  tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
    
    
    wCD = np.array(wordCountDoku.eval())
    
    
indexedwCD = []
for n in range(len(wCD)):
    indexedwCD.append([n,wCD[n][0]])

#print('0',indexedwCD)


#indexedwCD = np.transpose(np.array(indexedwCD))

#print('1',indexedwCD)


indexedwCD = sorted(indexedwCD[::-1], key=lambda tup: tup[1], reverse=True)

#print('2', indexedwCD)
#print('2',indexedwCD[::-1])

#print(indexedwCD)

#print('Here come the fuckn best %d dokument/s that match your problem:' %(user_input_n))

best_n_documents = []


for n in range(user_input_n):
    #print(indexedwCD[n][0])
    best_n_documents.append(indexedwCD[n][0])
    
#print(best_n_documents)


cwd = os.getcwd()
#rechtsprechIn = os.listdir(cwd + '/' + 'EndDokumente')

rechtsprechIn = hkl.load('bagofwords/rechtsprechIn_gzip.hkl')

#print(rechtsprechIn)

from subprocess import call

for n in range(user_input_n):
    call(['nano', cwd + '/' + 'EndDokumente/' + rechtsprechIn[int(best_n_documents[n])]])


# Calculate the best matching parallelized with tf


# Get the id of documents which fit the best

# Display the n best matching dokuments