29 lines
700 B
Python
29 lines
700 B
Python
# fist to lower, because written uppercase stops wont be recognized
|
|
|
|
import os
|
|
import sys
|
|
|
|
Indok = sys.argv[1]
|
|
Outdok = sys.argv[2]
|
|
|
|
import nltk
|
|
|
|
nltk.download('stopwords')
|
|
|
|
from nltk.corpus import stopwords # Import the stop word list
|
|
|
|
|
|
stops = set(stopwords.words("german"))
|
|
|
|
|
|
with open(Indok) as InDok:
|
|
with open(Outdok, 'a') as OutDok:
|
|
for line in InDok:
|
|
linelist_noStopwords = []
|
|
words = eval(line[:-1])
|
|
linelist_noStopwords = [w for w in words if not w in stops]
|
|
#for word in linelist not in stops:
|
|
# linelist_noStopwords.append(word)
|
|
OutDok.write(str(linelist_noStopwords))
|
|
OutDok.write('\n')
|
|
|