92 lines
2 KiB
Python
92 lines
2 KiB
Python
# prototype User Interface
|
|
|
|
from sklearn.externals import joblib
|
|
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
|
import numpy as np
|
|
import scipy
|
|
|
|
import tensorflow as tf
|
|
|
|
import _pickle as cPickle
|
|
|
|
import hickle as hkl
|
|
|
|
#Load the zeros and ones from the database
|
|
|
|
dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')
|
|
|
|
|
|
dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)
|
|
|
|
# Get the user input
|
|
user_input_words = input("Please describe your problem: ")
|
|
|
|
user_input_n = int(input("How many dokuments would you like to display?: "))
|
|
# Convert user input to Zeros and Ones
|
|
user_array = []
|
|
user_array.append(user_input_words)
|
|
|
|
|
|
vectorizer = joblib.load('models/bagofwords.pkl')
|
|
|
|
user_input_OnesZeros = vectorizer.transform(user_array)
|
|
uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)
|
|
|
|
uiOZ = uOZ
|
|
|
|
#uiOZ = np.transpose(uOZ[np.newaxis, :])
|
|
uiOZ = uOZ[np.newaxis, :]
|
|
|
|
|
|
print(uiOZ)
|
|
sess = tf.Session()
|
|
|
|
with sess.as_default():
|
|
uiOZ_tensor = tf.constant(uiOZ)
|
|
|
|
dbOZ_tensor = tf.constant(dbOZ)
|
|
|
|
uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
|
|
dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
|
|
|
|
|
|
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
|
|
wordCountDoku = tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)
|
|
|
|
wCD = np.array(wordCountDoku.eval()[0])
|
|
|
|
print(type(wCD))
|
|
|
|
print('end',wordCountDoku.eval())
|
|
|
|
|
|
indexedwCD = []
|
|
for n in range(len(wCD)):
|
|
indexedwCD.append([wCD[n],n])
|
|
|
|
print(indexedwCD)
|
|
|
|
|
|
indexedwCD = np.transpose(np.array(indexedwCD))
|
|
|
|
print(indexedwCD)
|
|
|
|
|
|
indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)
|
|
print(indexedwCD)
|
|
|
|
|
|
for n in range(user_input_n):
|
|
print(indexedwCD[n][1])
|
|
|
|
|
|
# Calculate the best matching parallelized with tf
|
|
|
|
|
|
|
|
|
|
# Get the id of documents which fit the best
|
|
|
|
# Display the n best matching dokuments
|
|
|