|
|
- # prototype User Interface
-
- from sklearn.externals import joblib
- from sklearn.feature_extraction.text import CountVectorizer
-
- import numpy as np
- import scipy as sc
-
- import tensorflow as tf
-
- import _pickle as cPickle
-
- import hickle as hkl
-
- import os
-
-
- # Define function to convert scipy csr matrix to tf tensor for working on gpu
- def convert_sparse_matrix_to_sparse_tensor(X):
- coo = sc.sparse.coo_matrix(X)
- indices = np.mat([coo.row, coo.col]).transpose()
- return tf.SparseTensorValue(indices, coo.data, coo.shape)
-
-
-
-
-
- #Load the zeros and ones from the database
-
- dbOZ = hkl.load('bagofwords/OnesZerosDB_gzip.hkl')
-
- print(len(dbOZ))
-
- # transpose with csr transpose
- #dbOZ = dbOZ.transpose()
-
-
- # as type with csr as type
- dbOZ = dbOZ.astype('float32')
-
-
-
-
-
-
- #print(type(convert_sparse_matrix_to_sparse_tensor(dbOZ)))
-
- #print('bla',dbOZ)
-
- #dbOZ = dbOZ.transpose()
-
-
- #dbOZ = np.transpose(np.array(dbOZ).astype(np.float32, copy=False))
-
-
- #print('bla',dbOZ)
-
- #dbOZ.transpose()
-
- # Get the user input
- user_input_words = input("Please describe your problem: ")
-
- user_input_n = int(input("How many dokuments would you like to display?: "))
- # Convert user input to Zeros and Ones
- user_array = []
- user_array.append(user_input_words)
-
- print(user_array)
-
- from nltk.stem.snowball import SnowballStemmer
- stemmer = SnowballStemmer("german")
-
- user_array = [stemmer.stem(word) for word in user_array]
-
- print(user_array)
-
- vectorizer = joblib.load('bagofwords/bagofwords.pkl')
-
- user_input_OnesZeros = vectorizer.transform(user_array)
- uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)
-
- #uiOZ = uOZ
-
- #uiOZ = np.transpose(uOZ[np.newaxis, :])
- uiOZ = uOZ[np.newaxis, :]
-
- uiOZ = uiOZ.transpose()
-
- #print('1', uiOZ)
-
- #print('2', dbOZ)
- sess = tf.Session()
-
- with sess.as_default():
- uiOZ_tensor = tf.constant(uiOZ)
-
- dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(dbOZ)
-
- #uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
- #dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
-
-
- #wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
- wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
-
-
-
- wCD = np.array(wordCountDoku.eval())
-
-
-
-
-
- indexedwCD = []
- for n in range(len(wCD)):
- indexedwCD.append([n,wCD[n][0]])
-
- #print('0',indexedwCD)
-
-
- #indexedwCD = np.transpose(np.array(indexedwCD))
-
- #print('1',indexedwCD)
-
-
-
- indexedwCD = sorted(indexedwCD[::-1], key=lambda tup: tup[1], reverse=True)
-
- #print('2', indexedwCD)
- #print('2',indexedwCD[::-1])
-
- #print(indexedwCD)
-
- #print('Here come the fuckn best %d dokument/s that match your problem:' %(user_input_n))
-
- best_n_documents = []
-
-
- for n in range(user_input_n):
- #print(indexedwCD[n][0])
- best_n_documents.append(indexedwCD[n][0])
-
- #print(best_n_documents)
-
-
- cwd = os.getcwd()
- #rechtsprechIn = os.listdir(cwd + '/' + 'EndDokumente')
-
- rechtsprechIn = hkl.load('bagofwords/rechtsprechIn_gzip.hkl')
-
- #print(rechtsprechIn)
-
- from subprocess import call
-
- for n in range(user_input_n):
- call(['nano', cwd + '/' + 'EndDokumente/' + rechtsprechIn[int(best_n_documents[n])]])
-
-
-
-
-
-
- # Calculate the best matching parallelized with tf
-
-
-
-
- # Get the id of documents which fit the best
-
- # Display the n best matching dokuments
-
|