Search on legal documents using Tensorflow and a web_actix web interface
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

29 lines
700 B

# fist to lower, because written uppercase stops wont be recognized
import os
import sys
Indok = sys.argv[1]
Outdok = sys.argv[2]
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords # Import the stop word list
stops = set(stopwords.words("german"))
with open(Indok) as InDok:
with open(Outdok, 'a') as OutDok:
for line in InDok:
linelist_noStopwords = []
words = eval(line[:-1])
linelist_noStopwords = [w for w in words if not w in stops]
#for word in linelist not in stops:
# linelist_noStopwords.append(word)
OutDok.write(str(linelist_noStopwords))
OutDok.write('\n')