43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
|
# fist to lower, because written uppercase stops wont be recognized
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
|
||
|
Indok = sys.argv[1]
|
||
|
Outdok = sys.argv[2]
|
||
|
|
||
|
from gensim.models import word2vec
|
||
|
|
||
|
import logging
|
||
|
import gensim
|
||
|
|
||
|
print('loading model...')
|
||
|
model = gensim.models.KeyedVectors.load_word2vec_format('german.model.big', binary=True) # C binary format
|
||
|
print('done')
|
||
|
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
|
||
|
|
||
|
|
||
|
|
||
|
with open(Indok) as InDok:
|
||
|
with open(Outdok, 'a') as OutDok:
|
||
|
for line in InDok:
|
||
|
linelist_wordcloud = []
|
||
|
words = eval(line[:-1])
|
||
|
for word in words:
|
||
|
synonyms = []
|
||
|
try:
|
||
|
momo = model.similar_by_word(word, topn=3)
|
||
|
|
||
|
for element in momo:
|
||
|
synonyms.append(element[0])
|
||
|
except KeyError:
|
||
|
pass
|
||
|
|
||
|
|
||
|
for s in synonyms:
|
||
|
linelist_wordcloud.append(s)
|
||
|
linelist_wordcloud.append(word)
|
||
|
OutDok.write(str(linelist_wordcloud))
|
||
|
OutDok.write('\n')
|
||
|
|