Cyberlaywer/build/tfgpu-cyberlaywer/pythonlistInTxtFile2NoStopwords.py
2023-03-06 15:36:57 +01:00

29 lines
700 B
Python

# fist to lower, because written uppercase stops wont be recognized
import os
import sys
Indok = sys.argv[1]
Outdok = sys.argv[2]
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords # Import the stop word list
stops = set(stopwords.words("german"))
with open(Indok) as InDok:
with open(Outdok, 'a') as OutDok:
for line in InDok:
linelist_noStopwords = []
words = eval(line[:-1])
linelist_noStopwords = [w for w in words if not w in stops]
#for word in linelist not in stops:
# linelist_noStopwords.append(word)
OutDok.write(str(linelist_noStopwords))
OutDok.write('\n')