alpcentaur
/
laywerrobot


								# prototype User Interface


								from sklearn.externals import joblib

								from sklearn.feature_extraction.text import CountVectorizer


								import numpy as np

								import scipy as sc


								import tensorflow as tf


								import _pickle as cPickle


								import hickle as hkl


								import os


								# Define function to convert scipy csr matrix to tf tensor for working on gpu

								def convert_sparse_matrix_to_sparse_tensor(X):

								    coo = sc.sparse.coo_matrix(X)

								    indices = np.mat([coo.row, coo.col]).transpose()

								    return tf.SparseTensorValue(indices, coo.data, coo.shape)


								#Load the zeros and ones from the database


								dbOZ = hkl.load('bagofwords/OnesZerosDB_gzip.hkl')


								print(len(dbOZ))


								# transpose with csr transpose

								#dbOZ = dbOZ.transpose()


								# as type with csr as type

								dbOZ = dbOZ.astype('float32')


								#print(type(convert_sparse_matrix_to_sparse_tensor(dbOZ)))


								#print('bla',dbOZ)


								#dbOZ = dbOZ.transpose()


								#dbOZ = np.transpose(np.array(dbOZ).astype(np.float32, copy=False))


								#print('bla',dbOZ)


								#dbOZ.transpose()


								# Get the user input

								user_input_words = input("Please describe your problem: ")


								user_input_n = int(input("How many dokuments would you like to display?: "))

								# Convert user input to Zeros and Ones

								user_array = []

								user_array.append(user_input_words)


								print(user_array)


								from nltk.stem.snowball import SnowballStemmer

								stemmer = SnowballStemmer("german")


								user_array = [stemmer.stem(word) for word in user_array]


								print(user_array)


								vectorizer = joblib.load('bagofwords/bagofwords.pkl')


								user_input_OnesZeros = vectorizer.transform(user_array)

								uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)


								#uiOZ = uOZ


								#uiOZ = np.transpose(uOZ[np.newaxis, :])

								uiOZ = uOZ[np.newaxis, :]


								uiOZ = uiOZ.transpose()


								#print('1', uiOZ)


								#print('2', dbOZ)

								sess = tf.Session()


								with sess.as_default():

								    uiOZ_tensor = tf.constant(uiOZ)


								    dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(dbOZ)


								    #uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )

								    #dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )


								    #wordCountDoku =  tf.matmul(uiOZ_tensor, dbOZ_tensor)

								    wordCountDoku =  tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)


								    wCD = np.array(wordCountDoku.eval())


								indexedwCD = []

								for n in range(len(wCD)):

								    indexedwCD.append([n,wCD[n][0]])


								#print('0',indexedwCD)


								#indexedwCD = np.transpose(np.array(indexedwCD))


								#print('1',indexedwCD)


								indexedwCD = sorted(indexedwCD[::-1], key=lambda tup: tup[1], reverse=True)


								#print('2', indexedwCD)

								#print('2',indexedwCD[::-1])


								#print(indexedwCD)


								#print('Here come the fuckn best %d dokument/s that match your problem:' %(user_input_n))


								best_n_documents = []


								for n in range(user_input_n):

								    #print(indexedwCD[n][0])

								    best_n_documents.append(indexedwCD[n][0])


								#print(best_n_documents)


								cwd = os.getcwd()

								#rechtsprechIn = os.listdir(cwd + '/' + 'EndDokumente')


								rechtsprechIn = hkl.load('bagofwords/rechtsprechIn_gzip.hkl')


								#print(rechtsprechIn)


								from subprocess import call


								for n in range(user_input_n):

								    call(['nano', cwd + '/' + 'EndDokumente/' + rechtsprechIn[int(best_n_documents[n])]])


								# Calculate the best matching parallelized with tf


								# Get the id of documents which fit the best


								# Display the n best matching dokuments