# coding=utf-8
|
|
|
|
import os
|
|
import logging
|
|
import sys
|
|
import re
|
|
|
|
directoryIn = sys.argv[1]
|
|
Outdok = sys.argv[2]
|
|
|
|
cwd = os.getcwd()
|
|
|
|
documentsIn = os.listdir(cwd + '/' + directoryIn)
|
|
for document in documentsIn:
|
|
with open(Outdok, 'a') as OutDok:
|
|
with open(cwd + '/' + directoryIn + document) as Indok:
|
|
lines = []
|
|
for line in Indok:
|
|
lines += [str(line)[:-1]]
|
|
OutDok.write(str(re.sub("[^a-zA-Züäöß]", " ", ' '.join(lines)).split()))
|
|
OutDok.write('\n')
|
|
|