|
|
- # fist to lower, because written uppercase stops wont be recognized
-
- import os
- import sys
-
- Indok = sys.argv[1]
- Outdok = sys.argv[2]
-
- from gensim.models import word2vec
-
- import logging
- import gensim
-
- print('loading model...')
- model = gensim.models.KeyedVectors.load_word2vec_format('german.model.big', binary=True) # C binary format
- print('done')
- logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-
-
-
- with open(Indok) as InDok:
- with open(Outdok, 'a') as OutDok:
- for line in InDok:
- linelist_wordcloud = []
- words = eval(line[:-1])
- for word in words:
- synonyms = []
- try:
- momo = model.similar_by_word(word, topn=3)
-
- for element in momo:
- synonyms.append(element[0])
- except KeyError:
- pass
-
-
- for s in synonyms:
- linelist_wordcloud.append(s)
- linelist_wordcloud.append(word)
- OutDok.write(str(linelist_wordcloud))
- OutDok.write('\n')
-
|