small change to SolveShorts, FASTsearch now using GPU for calculations, requirements.txt added, cuda has to be exact version 9.0, cudann exact version 7.0

This commit is contained in:
alpcentaur 2020-09-18 00:53:43 +02:00
parent ed40090463
commit 3b66a89dc2
19 changed files with 328 additions and 79 deletions

View file

@ -185,14 +185,18 @@
"source": [
"\n",
"# importing the libraries\n",
"#print('importing libraries')\n",
"#from SolveShorts import *\n",
"#import SentSeg\n",
"#from SayYes import *\n",
"#from Passiv2Aktiv import *\n",
"#from GenitivSolve import *\n",
"#from ConjunctSolve import *\n",
"\n",
"#from FremdWB import *\n",
"#from Medio import *\n",
"#from oi import *\n",
"#print('done')\n",
"\n",
"\n",
"# Initializing the libraries\n",
"#print('initializing the libraries')\n",
@ -210,10 +214,12 @@
"#cs = ConjunctSolve(None,None)\n",
"#print('7')\n",
"#oi = oi()\n",
"\n",
"#from FremdWB import *\n",
"#print('8')\n",
"#fwb = FremdWB(None,None)\n",
"#fwb.load_DB_into_FASTsearch()\n",
"#print('9')\n",
"#medi = Medio(None,None)\n",
"#print('done')\n",
"\n",
"\n",
"# loading the databases and models\n",
"#print('loading SolveShorts Databases')\n",
@ -226,6 +232,11 @@
"#p2a.load_DB_into_FASTsearch()\n",
"#print('loading conjunctivesolve Databases')\n",
"#cs.load_DB_into_FASTsearch()\n",
"#print('loading the fremdwb Databases')\n",
"#fwb.load_DB_into_FASTsearch()\n",
"#print('loading the mediodot Databases')\n",
"#medi.load_DB_into_FASTsearch()\n",
"\n",
"#print('done')\n",
"\n"
]
@ -241,15 +252,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"loading SolveShorts Databases\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"dumping the session\n",
"done\n"
]
@ -257,7 +259,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
"model_id": "74fc341e0a474605b1f95c3e4e35d0b2",
"version_major": 2,
"version_minor": 0
},
@ -474,7 +476,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
"model_id": "082cb6fb58aa41cc82d918d2a056258d",
"version_major": 2,
"version_minor": 0
},
@ -674,7 +676,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
"model_id": "dad4baed09a5407194ae2daf153e8f43",
"version_major": 2,
"version_minor": 0
},
@ -714,7 +716,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "564058b35ab743fabff90d4c49c5aac3",
"model_id": "4bff927b0a404ed0b909db2bd766ac65",
"version_major": 2,
"version_minor": 0
},
@ -836,7 +838,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
"model_id": "b6053b85bdcd4446b010b5fb872dc52c",
"version_major": 2,
"version_minor": 0
},
@ -862,7 +864,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
"model_id": "4d3e6e3a2bcb499697a50a1a1e88a8a4",
"version_major": 2,
"version_minor": 0
},

View file

@ -15,6 +15,7 @@ import scipy as sc
import tensorflow as tf
import _pickle as cPickle
import hickle as hkl
@ -132,21 +133,21 @@ class FASTsearch(object):
uiOZ = uiOZ.transpose()
sess = tf.Session()
with tf.device('/gpu:0'):
with sess.as_default():
with sess.as_default():
uiOZ_tensor = tf.constant(uiOZ)
uiOZ_tensor = tf.constant(uiOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
wCD = np.array(wordCountDoku.eval())
wCD = np.array(wordCountDoku.eval())
indexedwCD = []
for n in range(len(wCD)):
@ -206,21 +207,21 @@ class FASTsearch(object):
uiOZ = uiOZ.transpose()
sess = tf.Session()
with tf.device('/gpu:0'):
with sess.as_default():
with sess.as_default():
uiOZ_tensor = tf.constant(uiOZ)
uiOZ_tensor = tf.constant(uiOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
wCD = np.array(wordCountDoku.eval())
wCD = np.array(wordCountDoku.eval())
indexedwCD = []
for n in range(len(wCD)):
@ -257,21 +258,21 @@ class FASTsearch(object):
uiOZ = uiOZ.transpose()
sess = tf.Session()
with tf.device('/gpu:0'):
with sess.as_default():
with sess.as_default():
uiOZ_tensor = tf.constant(uiOZ)
uiOZ_tensor = tf.constant(uiOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
dbOZ_tensor_sparse = convert_sparse_matrix_to_sparse_tensor(self.dbOZ)
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#uiOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(uiOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#dbOZ_tensor_sparse =tf.contrib.layers.dense_to_sparse(dbOZ_tensor, eos_token=0, outputs_collections=None, scope=None )
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
#wordCountDoku = tf.matmul(uiOZ_tensor, dbOZ_tensor)
wordCountDoku = tf.sparse_tensor_dense_matmul(dbOZ_tensor_sparse, uiOZ_tensor)
wCD = np.array(wordCountDoku.eval())
wCD = np.array(wordCountDoku.eval())
indexedwCD = []
for n in range(len(wCD)):

View file

@ -185,14 +185,18 @@
"source": [
"\n",
"# importing the libraries\n",
"#print('importing libraries')\n",
"#from SolveShorts import *\n",
"#import SentSeg\n",
"#from SayYes import *\n",
"#from Passiv2Aktiv import *\n",
"#from GenitivSolve import *\n",
"#from ConjunctSolve import *\n",
"\n",
"#from FremdWB import *\n",
"#from Medio import *\n",
"#from oi import *\n",
"#print('done')\n",
"\n",
"\n",
"# Initializing the libraries\n",
"#print('initializing the libraries')\n",
@ -210,10 +214,12 @@
"#cs = ConjunctSolve(None,None)\n",
"#print('7')\n",
"#oi = oi()\n",
"\n",
"#from FremdWB import *\n",
"#print('8')\n",
"#fwb = FremdWB(None,None)\n",
"#fwb.load_DB_into_FASTsearch()\n",
"#print('9')\n",
"#medi = Medio(None,None)\n",
"#print('done')\n",
"\n",
"\n",
"# loading the databases and models\n",
"#print('loading SolveShorts Databases')\n",
@ -226,6 +232,11 @@
"#p2a.load_DB_into_FASTsearch()\n",
"#print('loading conjunctivesolve Databases')\n",
"#cs.load_DB_into_FASTsearch()\n",
"#print('loading the fremdwb Databases')\n",
"#fwb.load_DB_into_FASTsearch()\n",
"#print('loading the mediodot Databases')\n",
"#medi.load_DB_into_FASTsearch()\n",
"\n",
"#print('done')\n",
"\n"
]
@ -241,15 +252,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"loading SolveShorts Databases\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"dumping the session\n",
"done\n"
]
@ -257,7 +259,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
"model_id": "74fc341e0a474605b1f95c3e4e35d0b2",
"version_major": 2,
"version_minor": 0
},
@ -474,7 +476,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
"model_id": "082cb6fb58aa41cc82d918d2a056258d",
"version_major": 2,
"version_minor": 0
},
@ -674,7 +676,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
"model_id": "dad4baed09a5407194ae2daf153e8f43",
"version_major": 2,
"version_minor": 0
},
@ -714,7 +716,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "564058b35ab743fabff90d4c49c5aac3",
"model_id": "4bff927b0a404ed0b909db2bd766ac65",
"version_major": 2,
"version_minor": 0
},
@ -836,7 +838,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
"model_id": "b6053b85bdcd4446b010b5fb872dc52c",
"version_major": 2,
"version_minor": 0
},
@ -862,7 +864,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
"model_id": "4d3e6e3a2bcb499697a50a1a1e88a8a4",
"version_major": 2,
"version_minor": 0
},

View file

@ -130,7 +130,7 @@ class SolveShorts(object):
NhasToBeChecked = False
# Liste von falsch erkannten, zb er sollte nicht erkannt werden :)
if sentence[n] in ['Er', 'er', 'ab', 'Ab', 'so', 'da', 'an', 'mit']:
if sentence[n] in ['Er', 'er', 'ab', 'Ab', 'so', 'da', 'an', 'mit', 'Am', 'am']:
NhasToBeChecked = False
if n != 0 and sentence[n][-1] != '.' and doc[n - 1].dep_[:2] != 'ART':

View file

@ -0,0 +1,7 @@
die Rede ist vom NRW-Polizei­skandal .
29 PolizistInnen wurden suspendiert, weil sie Teil rechtsextremer Whatsapp-Chatgruppen waren, die teils seit 2012 bestanden .
die Betroffenen gehörten fast alle zum Polizeipräsidium Essen, eine Dienstgruppe in Mülheim wurde komplett freigestellt, inklusive Dienstgruppenführer .
am Donnerstag sprach Reul von einer weiteren suspendierten Beamtin, auch sie aus der Mülheimer Gruppe .
Bundesweit wird nun über Konsequenzen diskutiert .
und die Affäre könnte sich noch ausweiten .
denn bisher hatten die Ermittler nur das Telefon eines Beamten, welches die Ermittlungen ins Rollen brachten .

View file

@ -0,0 +1,16 @@
die Rede ist vom NRW-Polizei­skandal .
29 PolizistInnen wurden suspendiert .
Teil sie waren rechtsextremer Whatsapp-Chatgruppen weil .
Teils bestanden diese seit 2012 .
die Betroffenen gehoerten fast alle zum Polizeipraesidium Essen .
eine Dienstgruppe in Muelheim wurde komplett freigestellt .
Inklusive Dienstgruppenfuehrer .
am (amos) Donnerstag sprach Reul von einer weiteren suspendierten Beamtin .
auch sie aus der Muelheimer Gruppe .
bundesweit diskutiert jemand nun ueber Konsequenzen .
und die Affaere koennte sich noch ausweiten .
eine Affaere ist ein Skandal .
zum Beispiel :
etwas was viele Menschen schlimm finden .
die Ermittler eines Beamten hatten nur das Telefon denn bisher .
dieses ins Rollen brachten die Ermittlungen .

View file

@ -0,0 +1,15 @@
die Rede ist vom NRW-Polizei­skandal .
29 PolizistInnen wurden suspendiert .
weil sie Teil rechtsextremer Whatsapp-Chatgruppen waren .
Teils bestanden diese seit 2012 .
die Betroffenen gehoerten fast alle zum Polizeipraesidium Essen .
eine Dienstgruppe in Muelheim wurde komplett freigestellt .
Inklusive Dienstgruppenfuehrer .
am Donnerstag sprach Reul von einer weiteren suspendierten Beamtin .
auch sie aus der Muelheimer Gruppe .
bundesweit diskutiert jemand nun ueber Konsequenzen .
und die Affaere koennte sich noch ausweiten .
eine Affaere ist ein Skandal .
zum Beispiel: etwas was viele Menschen schlimm finden .
denn bisher hatten die Ermittler nur das Telefon eines Beamten .
dieses brachten die Ermittlungen ins Rollen .

View file

@ -1 +1 @@
143
144

Binary file not shown.

Binary file not shown.

206
Prototyp/requirements.txt Normal file
View file

@ -0,0 +1,206 @@
absl-py==0.6.1
aiohttp==3.5.4
aiohttp-socks==0.2.2
anytree==2.4.3
appdirs==1.4.3
appmode==0.7.0
argh==0.26.2
asn1crypto==0.24.0
astor==0.7.1
astunparse==1.6.3
async-generator==1.10
async-timeout==3.0.1
attrs==18.2.0
Automat==0.7.0
backcall==0.1.0
beautifulsoup4==4.6.3
bleach==1.5.0
blis==0.4.1
boto==2.49.0
boto3==1.9.71
botocore==1.12.71
bqplot==0.12.6
bz2file==0.98
CacheControl==0.12.5
cachetools==4.1.1
catalogue==1.0.0
certifi==2019.6.16
cffi==1.11.5
chardet==3.0.4
Click==7.0
colorama==0.4.1
constantly==15.1.0
cryptography==2.4.2
cssselect==1.0.3
cycler==0.10.0
cymem==2.0.2
Cython==0.29.2
cytoolz==0.9.0.1
de-core-news-sm==2.0.0
decorator==4.3.0
defusedxml==0.5.0
dill==0.2.8.2
distlib==0.2.8
distro==1.3.0
dnspython==1.16.0
docutils==0.14
ecdsa==0.13
engineering-notation==0.6.0
entrypoints==0.2.3
gast==0.2.0
gensim==3.6.0
Glances==3.0.2
grpcio==1.17.0
gunicorn==19.9.0
h5py==2.8.0
hickle==3.3.2
html5lib==0.9999999
hyperlink==18.0.0
idna==2.7
idna-ssl==1.1.0
importlib-metadata==1.4.0
incremental==17.5.0
ipykernel==4.9.0
ipython==7.1.1
ipython-genutils==0.1.0
ipyvue==1.3.1
ipyvuetify==1.2.2
ipywidgets==7.5.1
jedi==0.13.1
Jinja2==2.10
jmespath==0.9.3
jsonrpclib-pelix==0.3.2
jsonschema==2.6.0
jupyter-client==6.1.2
jupyter-console==6.0.0
jupyter-contrib-core==0.3.3
jupyter-contrib-nbextensions==0.5.1
jupyter-core==4.6.0
jupyter-highlight-selected-word==0.2.0
jupyter-latex-envs==1.4.6
jupyter-nbextensions-configurator==0.4.1
jupyter-server==0.1.1
jupyterlab-pygments==0.1.0
jupyterthemes==0.20.0
Keras-Applications==1.0.6
Keras-Preprocessing==1.0.5
kiwisolver==1.0.1
lesscpy==0.14.0
lockfile==0.12.2
lxml==4.2.5
Markdown==2.6.11
MarkupSafe==1.1.0
matplotlib==3.0.2
mistune==0.8.4
mock==3.0.5
more-itertools==8.1.0
msgpack==0.5.6
msgpack-numpy==0.4.3.2
multidict==4.5.2
murmurhash==1.0.1
nbconvert==5.6.1
nbformat==4.4.0
ngrok==0.0.1
nltk==3.4.1
notebook==5.7.2
numpy==1.15.4
oauthlib==3.1.0
olefile==0.46
opt-einsum==3.3.0
packaging==18.0
pandas==0.23.4
pandocfilters==1.4.2
parsel==1.5.1
parso==0.3.1
pathlib2==2.3.5
pbkdf2==1.3
pdfminer3k==1.3.1
pep517==0.3
pexpect==4.6.0
pickleshare==0.7.5
Pillow==5.3.0
plac==0.9.6
pluggy==0.13.1
ply==3.11
preshed==2.0.1
progress==1.4
prometheus-client==0.4.2
prompt-toolkit==2.0.7
protobuf==3.6.1
psutil==5.4.8
ptyprocess==0.6.0
py==1.8.1
pyaes==1.6.1
pyasn1==0.4.4
pyasn1-modules==0.2.2
pycparser==2.19
pycryptodomex==3.6.6
PyDispatcher==2.0.5
Pygments==2.6.1
PyHamcrest==1.9.0
pyOpenSSL==18.0.0
pyparsing==2.3.0
PyQt5==5.11.3
PyQt5-sip==4.19.13
PySocks==1.6.8
PyStemmer==1.3.0
pytest==5.3.2
python-dateutil==2.7.5
pytoml==0.1.20
pytz==2018.7
PyYAML==5.3
pyzmq==17.1.0
qrcode==6.0
QtPy==1.5.1
queuelib==1.5.0
regex==2018.1.10
requests==2.20.1
requests-oauthlib==1.3.0
retrying==1.3.3
rsa==4.6
s3transfer==0.1.13
scikit-learn==0.20.0
scipy==1.1.0
Scrapy==1.5.1
Send2Trash==1.5.0
service-identity==18.1.0
simplegeneric==0.8.1
six==1.12.0
smart-open==1.7.1
spacy==2.0.18
srsly==1.0.1
tensorboard==1.12.0
tensorboard-plugin-wit==1.7.0
tensorflow==1.12.0
tensorflow-gpu==1.12.0
tensorflow-serving-api==1.12.0
tensorflow-tensorboard==1.5.1
termcolor==1.1.0
terminado==0.8.1
testpath==0.4.2
thinc==6.12.1
tk-tools==0.12.0
toolz==0.9.0
tornado==5.1.1
tqdm==4.28.1
traitlets==4.3.2
traittypes==0.2.1
Twisted==18.9.0
typing-extensions==3.7.4.1
ujson==1.35
urllib3==1.24.1
virtualenv==16.1.0
voila==0.1.21
voila-gridstack==0.0.8
voila-vuetify==0.2.2
w3lib==1.19.0
wasabi==0.6.0
wcwidth==0.1.7
webencodings==0.5.1
websocket-client==0.54.0
Werkzeug==0.14.1
widgetsnbextension==3.5.1
wrapt==1.10.11
yarl==1.3.0
zipp==0.6.0
zope.interface==4.6.0

Binary file not shown.