|
|
- # coding=utf-8
-
- import os
- import logging
- import sys
- import re
-
- directoryIn = sys.argv[1]
- Outdok = sys.argv[2]
-
- cwd = os.getcwd()
-
- documentsIn = os.listdir(cwd + '/' + directoryIn)
- for document in documentsIn:
- with open(Outdok, 'a') as OutDok:
- with open(cwd + '/' + directoryIn + document) as Indok:
- lines = []
- for line in Indok:
- lines += [str(line)[:-1]]
- OutDok.write(str(re.sub("[^a-zA-Züäöß]", " ", ' '.join(lines)).split()))
- OutDok.write('\n')
-
|