Search on legal documents using Tensorflow and a web_actix web interface
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

22 lines
528 B

# coding=utf-8
import os
import logging
import sys
import re
directoryIn = sys.argv[1]
Outdok = sys.argv[2]
cwd = os.getcwd()
documentsIn = os.listdir(cwd + '/' + directoryIn)
for document in documentsIn:
with open(Outdok, 'a') as OutDok:
with open(cwd + '/' + directoryIn + document) as Indok:
lines = []
for line in Indok:
lines += [str(line)[:-1]]
OutDok.write(str(re.sub("[^a-zA-Züäöß]", " ", ' '.join(lines)).split()))
OutDok.write('\n')