|
|
- # prototype User Interface
-
- from sklearn.externals import joblib
- from sklearn.feature_extraction.text import CountVectorizer
-
- import numpy as np
- import scipy
-
- import tensorflow as tf
-
- import _pickle as cPickle
-
- import hickle as hkl
-
-
-
-
- # Load the Zeros and Ones of the database
-
- #pickdbOZ = open("databaseOneZero/OnesZerosDB.bin","rb")
- #dbOZ = cPickle.load(pickdbOZ)
- #dbOZ = dbOZ.astype(np.float32, copy=False)
-
- dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')
-
- #print(array_hkl)
-
- #numpyarray = np.load('databaseOneZero/OnesZerosDB.npy')
-
- #print(numpyarray)
-
- #print('pickle loaded:')
- #print('sum test1', sum(dbOZ[0]))
- #print('sum test2', sum(dbOZ[1]))
- #print('hdf5 loaded:')
- #print(array_hkl[1])
- #print('numpy loaded:')
- #print(numpyarray[1])
-
- print(dbOZ)
- print(np.array(dbOZ))
- print(np.array(dbOZ).astype(np.float32, copy=False))
-
- dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)
- # Get the user input
-
- user_input_words = input("Please describe your problem: ")
-
- user_input_n = int(input("How many dokuments would you like to display?: "))
- # Convert user input to Zeros and Ones
- user_array = []
- user_array.append(user_input_words)
-
-
- vectorizer = joblib.load('models/bagofwords.pkl')
-
- user_input_OnesZeros = vectorizer.transform(user_array)
- print(type(sum(user_input_OnesZeros[0])))
- print(user_input_OnesZeros)
- print(user_input_words)
- print(type(user_input_words))
- print(type(user_input_OnesZeros))
- uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)
- print(type(sum(uOZ)))
- print(np.array(uOZ))
-
- uiOZ = uOZ
-
- #uiOZ = np.transpose(uOZ[np.newaxis, :])
- uiOZ = uOZ[np.newaxis, :]
-
-
- print(uiOZ)
- sess = tf.Session()
-
- with sess.as_default():
- uiOZ_tensor = tf.constant(uiOZ)
-
- dbOZ_tensor = tf.constant(dbOZ)
-
- uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
- dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
-
-
- #wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
- wordCountDoku = tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)
-
- wCD = np.array(wordCountDoku.eval()[0])
-
- print(type(wCD))
-
- print('end',wordCountDoku.eval())
-
-
- indexedwCD = []
- for n in range(len(wCD)):
- indexedwCD.append([wCD[n],n])
-
- print(indexedwCD)
-
-
- indexedwCD = np.transpose(np.array(indexedwCD))
-
- print(indexedwCD)
-
-
- indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)
- print(indexedwCD)
-
-
- for n in range(user_input_n):
- print(indexedwCD[n][1])
-
-
- # Calculate the best matching parallelized with tf
-
-
-
-
- # Get the id of documents which fit the best
-
- # Display the n best matching dokuments
-
|