laywerrobot/LegalApp.py

# prototype User Interface

from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer

import numpy as np
import scipy

import tensorflow as tf

import _pickle as cPickle

import hickle as hkl

#Load the zeros and ones from the database

dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')


dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)

# Get the user input
user_input_words = input("Please describe your problem: ")

user_input_n = int(input("How many dokuments would you like to display?: "))
# Convert user input to Zeros and Ones
user_array = []
user_array.append(user_input_words)


vectorizer = joblib.load('models/bagofwords.pkl')

user_input_OnesZeros = vectorizer.transform(user_array)
uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)

uiOZ = uOZ

#uiOZ = np.transpose(uOZ[np.newaxis, :])
uiOZ = uOZ[np.newaxis, :]


print(uiOZ)
sess = tf.Session()

with sess.as_default():
    uiOZ_tensor = tf.constant(uiOZ)

    dbOZ_tensor = tf.constant(dbOZ)

    uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
    dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )


    #wordCountDoku =  tf.matmul(uiOZ_tensor, dbOZ_tensor)
    wordCountDoku =  tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)

    wCD = np.array(wordCountDoku.eval()[0])

    print(type(wCD))

    print('end',wordCountDoku.eval())


indexedwCD = []
for n in range(len(wCD)):
    indexedwCD.append([wCD[n],n])

print(indexedwCD)


indexedwCD = np.transpose(np.array(indexedwCD))

print(indexedwCD)


indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)
print(indexedwCD)


for n in range(user_input_n):
    print(indexedwCD[n][1])


# Calculate the best matching parallelized with tf


# Get the id of documents which fit the best

# Display the n best matching dokuments