# prototype User Interface from sklearn.externals import joblib from sklearn.feature_extraction.text import CountVectorizer import numpy as np import scipy import tensorflow as tf import _pickle as cPickle import hickle as hkl # Load the Zeros and Ones of the database #pickdbOZ = open("databaseOneZero/OnesZerosDB.bin","rb") #dbOZ = cPickle.load(pickdbOZ) #dbOZ = dbOZ.astype(np.float32, copy=False) dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl') #print(array_hkl) #numpyarray = np.load('databaseOneZero/OnesZerosDB.npy') #print(numpyarray) #print('pickle loaded:') #print('sum test1', sum(dbOZ[0])) #print('sum test2', sum(dbOZ[1])) #print('hdf5 loaded:') #print(array_hkl[1]) #print('numpy loaded:') #print(numpyarray[1]) print(dbOZ) print(np.array(dbOZ)) print(np.array(dbOZ).astype(np.float32, copy=False)) dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False) # Get the user input user_input_words = input("Please describe your problem: ") user_input_n = int(input("How many dokuments would you like to display?: ")) # Convert user input to Zeros and Ones user_array = [] user_array.append(user_input_words) vectorizer = joblib.load('models/bagofwords.pkl') user_input_OnesZeros = vectorizer.transform(user_array) print(type(sum(user_input_OnesZeros[0]))) print(user_input_OnesZeros) print(user_input_words) print(type(user_input_words)) print(type(user_input_OnesZeros)) uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False) print(type(sum(uOZ))) print(np.array(uOZ)) uiOZ = uOZ #uiOZ = np.transpose(uOZ[np.newaxis, :]) uiOZ = uOZ[np.newaxis, :] print(uiOZ) sess = tf.Session() with sess.as_default(): uiOZ_tensor = tf.constant(uiOZ) dbOZ_tensor = tf.constant(dbOZ) uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None ) dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None ) #wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor) wordCountDoku = tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor) wCD = np.array(wordCountDoku.eval()[0]) print(type(wCD)) print('end',wordCountDoku.eval()) indexedwCD = [] for n in range(len(wCD)): indexedwCD.append([wCD[n],n]) print(indexedwCD) indexedwCD = np.transpose(np.array(indexedwCD)) print(indexedwCD) indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False) print(indexedwCD) for n in range(user_input_n): print(indexedwCD[n][1]) # Calculate the best matching parallelized with tf # Get the id of documents which fit the best # Display the n best matching dokuments