Search on legal documents using Tensorflow and a web_actix web interface
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

42 lines
1.1 KiB

# fist to lower, because written uppercase stops wont be recognized
import os
import sys
Indok = sys.argv[1]
Outdok = sys.argv[2]
from gensim.models import word2vec
import logging
import gensim
print('loading model...')
model = gensim.models.KeyedVectors.load_word2vec_format('german.model.big', binary=True) # C binary format
print('done')
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
with open(Indok) as InDok:
with open(Outdok, 'a') as OutDok:
for line in InDok:
linelist_wordcloud = []
words = eval(line[:-1])
for word in words:
synonyms = []
try:
momo = model.similar_by_word(word, topn=3)
for element in momo:
synonyms.append(element[0])
except KeyError:
pass
for s in synonyms:
linelist_wordcloud.append(s)
linelist_wordcloud.append(word)
OutDok.write(str(linelist_wordcloud))
OutDok.write('\n')