You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123 lines
2.7 KiB

4 years ago
4 years ago
  1. # prototype User Interface
  2. from sklearn.externals import joblib
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. import numpy as np
  5. import scipy
  6. import tensorflow as tf
  7. import _pickle as cPickle
  8. import hickle as hkl
  9. # Load the Zeros and Ones of the database
  10. #pickdbOZ = open("databaseOneZero/OnesZerosDB.bin","rb")
  11. #dbOZ = cPickle.load(pickdbOZ)
  12. #dbOZ = dbOZ.astype(np.float32, copy=False)
  13. dbOZ = hkl.load('databaseOneZero/OnesZerosDB_gzip.hkl')
  14. #print(array_hkl)
  15. #numpyarray = np.load('databaseOneZero/OnesZerosDB.npy')
  16. #print(numpyarray)
  17. #print('pickle loaded:')
  18. #print('sum test1', sum(dbOZ[0]))
  19. #print('sum test2', sum(dbOZ[1]))
  20. #print('hdf5 loaded:')
  21. #print(array_hkl[1])
  22. #print('numpy loaded:')
  23. #print(numpyarray[1])
  24. print(dbOZ)
  25. print(np.array(dbOZ))
  26. print(np.array(dbOZ).astype(np.float32, copy=False))
  27. dbOZ = np.transpose(np.array(dbOZ)).astype(np.float32, copy=False)
  28. # Get the user input
  29. user_input_words = input("Please describe your problem: ")
  30. user_input_n = int(input("How many dokuments would you like to display?: "))
  31. # Convert user input to Zeros and Ones
  32. user_array = []
  33. user_array.append(user_input_words)
  34. vectorizer = joblib.load('models/bagofwords.pkl')
  35. user_input_OnesZeros = vectorizer.transform(user_array)
  36. print(type(sum(user_input_OnesZeros[0])))
  37. print(user_input_OnesZeros)
  38. print(user_input_words)
  39. print(type(user_input_words))
  40. print(type(user_input_OnesZeros))
  41. uOZ = user_input_OnesZeros.toarray()[0].astype(np.float32, copy=False)
  42. print(type(sum(uOZ)))
  43. print(np.array(uOZ))
  44. uiOZ = uOZ
  45. #uiOZ = np.transpose(uOZ[np.newaxis, :])
  46. uiOZ = uOZ[np.newaxis, :]
  47. print(uiOZ)
  48. sess = tf.Session()
  49. with sess.as_default():
  50. uiOZ_tensor = tf.constant(uiOZ)
  51. dbOZ_tensor = tf.constant(dbOZ)
  52. uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
  53. dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
  54. #wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
  55. wordCountDoku = tf.sparse_matmul(uiOZ_tensor, dbOZ_tensor)
  56. wCD = np.array(wordCountDoku.eval()[0])
  57. print(type(wCD))
  58. print('end',wordCountDoku.eval())
  59. indexedwCD = []
  60. for n in range(len(wCD)):
  61. indexedwCD.append([wCD[n],n])
  62. print(indexedwCD)
  63. indexedwCD = np.transpose(np.array(indexedwCD))
  64. print(indexedwCD)
  65. indexedwCD = sorted(indexedwCD, key=lambda tup: tup[1], reverse=False)
  66. print(indexedwCD)
  67. for n in range(user_input_n):
  68. print(indexedwCD[n][1])
  69. # Calculate the best matching parallelized with tf
  70. # Get the id of documents which fit the best
  71. # Display the n best matching dokuments