Cyberlaywer/build/tfgpu-cyberlaywer/pythonlistInTxtFile2wordClouds.py

43 lines
1.1 KiB
Python
Raw Normal View History

2023-03-06 15:36:57 +01:00
# fist to lower, because written uppercase stops wont be recognized
import os
import sys
Indok = sys.argv[1]
Outdok = sys.argv[2]
from gensim.models import word2vec
import logging
import gensim
print('loading model...')
model = gensim.models.KeyedVectors.load_word2vec_format('german.model.big', binary=True) # C binary format
print('done')
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
with open(Indok) as InDok:
with open(Outdok, 'a') as OutDok:
for line in InDok:
linelist_wordcloud = []
words = eval(line[:-1])
for word in words:
synonyms = []
try:
momo = model.similar_by_word(word, topn=3)
for element in momo:
synonyms.append(element[0])
except KeyError:
pass
for s in synonyms:
linelist_wordcloud.append(s)
linelist_wordcloud.append(word)
OutDok.write(str(linelist_wordcloud))
OutDok.write('\n')