laywerrobot/app.py

# prototype User Interface

from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer

import numpy as np
import scipy

import tensorflow as tf

import _pickle as cPickle

import hickle as hkl


# Load the Zeros and Ones of the database

#pickdbOZ = open("databaseOneZero/OnesZerosDB.bin","rb")
#dbOZ = cPickle.load(pickdbOZ)
#dbOZ = dbOZ.astype(np.float32, copy=False)

dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')

#print(array_hkl)

#numpyarray = np.load('databaseOneZero/OnesZerosDB.npy')

#print(numpyarray)

#print('pickle loaded:')
#print('sum test1', sum(dbOZ[0]))
#print('sum test2', sum(dbOZ[1]))
#print('hdf5 loaded:')
#print(array_hkl[1])
#print('numpy loaded:')
#print(numpyarray[1])


dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)
# Get the user input

user_input_words = input("Please describe your problem: ")

user_input_n = int(input("How many dokuments would you like to display?: "))
# Convert user input to Zeros and Ones
user_array = []
user_array.append(user_input_words)


vectorizer = joblib.load('models/bagofwords.pkl')

user_input_OnesZeros = vectorizer.transform(user_array)
print(type(sum(user_input_OnesZeros[0])))
print(user_input_OnesZeros)
print(user_input_words)
print(type(user_input_words))
print(type(user_input_OnesZeros))
uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)
print(type(sum(uOZ)))
print(np.array(uOZ))

uiOZ = uOZ

#uiOZ = np.transpose(uOZ[np.newaxis, :])
uiOZ = uOZ[np.newaxis, :]


print(uiOZ)
sess = tf.Session()

with sess.as_default():
    uiOZ_tensor = tf.constant(uiOZ)
    
    dbOZ_tensor = tf.constant(dbOZ)
    
    uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
    dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
    
    
    #wordCountDoku =  tf.matmul(uiOZ_tensor, dbOZ_tensor)
    wordCountDoku =  tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)
    
    wCD = np.array(wordCountDoku.eval()[0])
    
    print(type(wCD))
    
    print('end',wordCountDoku.eval())


indexedwCD = []
for n in range(len(wCD)):
    indexedwCD.append([wCD[n],n])

print(indexedwCD)


indexedwCD = np.transpose(np.array(indexedwCD))

print(indexedwCD)


indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)
print(indexedwCD)


for n in range(user_input_n):
    print(indexedwCD[n][1])


# Calculate the best matching parallelized with tf


# Get the id of documents which fit the best

# Display the n best matching dokuments
first commit 2020-08-27 21:55:39 +02:00			`# prototype User Interface`

			`from sklearn.externals import joblib`
			`from sklearn.feature_extraction.text import CountVectorizer`

			`import numpy as np`
			`import scipy`

			`import tensorflow as tf`

			`import _pickle as cPickle`

			`import hickle as hkl`




			`# Load the Zeros and Ones of the database`

			`#pickdbOZ = open("databaseOneZero/OnesZerosDB.bin","rb")`
			`#dbOZ = cPickle.load(pickdbOZ)`
			`#dbOZ = dbOZ.astype(np.float32, copy=False)`

			`dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')`

			`#print(array_hkl)`

			`#numpyarray = np.load('databaseOneZero/OnesZerosDB.npy')`

			`#print(numpyarray)`

			`#print('pickle loaded:')`
			`#print('sum test1', sum(dbOZ[0]))`
			`#print('sum test2', sum(dbOZ[1]))`
			`#print('hdf5 loaded:')`
			`#print(array_hkl[1])`
			`#print('numpy loaded:')`
			`#print(numpyarray[1])`



			`dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)`
			`# Get the user input`

			`user_input_words = input("Please describe your problem: ")`

			`user_input_n = int(input("How many dokuments would you like to display?: "))`
			`# Convert user input to Zeros and Ones`
			`user_array = []`
			`user_array.append(user_input_words)`


			`vectorizer = joblib.load('models/bagofwords.pkl')`

			`user_input_OnesZeros = vectorizer.transform(user_array)`
			`print(type(sum(user_input_OnesZeros[0])))`
			`print(user_input_OnesZeros)`
			`print(user_input_words)`
			`print(type(user_input_words))`
			`print(type(user_input_OnesZeros))`
			`uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)`
			`print(type(sum(uOZ)))`
			`print(np.array(uOZ))`

			`uiOZ = uOZ`

			`#uiOZ = np.transpose(uOZ[np.newaxis, :])`
			`uiOZ = uOZ[np.newaxis, :]`


			`print(uiOZ)`
			`sess = tf.Session()`

			`with sess.as_default():`
			`uiOZ_tensor = tf.constant(uiOZ)`

			`dbOZ_tensor = tf.constant(dbOZ)`

			`uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )`
			`dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )`


			`#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)`
			`wordCountDoku = tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)`

			`wCD = np.array(wordCountDoku.eval()[0])`

			`print(type(wCD))`

			`print('end',wordCountDoku.eval())`


			`indexedwCD = []`
			`for n in range(len(wCD)):`
			`indexedwCD.append([wCD[n],n])`

			`print(indexedwCD)`


			`indexedwCD = np.transpose(np.array(indexedwCD))`

			`print(indexedwCD)`


			`indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)`
			`print(indexedwCD)`


			`for n in range(user_input_n):`
			`print(indexedwCD[n][1])`


			`# Calculate the best matching parallelized with tf`




			`# Get the id of documents which fit the best`

			`# Display the n best matching dokuments`