|
|
- # fist to lower, because written uppercase stops wont be recognized
-
- import os
- import sys
-
- Indok = sys.argv[1]
- Outdok = sys.argv[2]
-
- import nltk
-
- nltk.download('stopwords')
-
- from nltk.corpus import stopwords # Import the stop word list
-
-
- stops = set(stopwords.words("german"))
-
-
- with open(Indok) as InDok:
- with open(Outdok, 'a') as OutDok:
- for line in InDok:
- linelist_noStopwords = []
- words = eval(line[:-1])
- linelist_noStopwords = [w for w in words if not w in stops]
- #for word in linelist not in stops:
- # linelist_noStopwords.append(word)
- OutDok.write(str(linelist_noStopwords))
- OutDok.write('\n')
-
|