25 lines
849 B
Python
25 lines
849 B
Python
|
import os
|
||
|
import sys
|
||
|
|
||
|
Indok = sys.argv[1]
|
||
|
Outdok = sys.argv[2]
|
||
|
|
||
|
with open(Indok) as InDok:
|
||
|
with open(Outdok, 'a') as OutDok:
|
||
|
for line in InDok:
|
||
|
linelist_lower = []
|
||
|
linelist = eval(line[:-1])
|
||
|
for word in linelist:
|
||
|
if '_' and '^' and 'x0' and 'x1' not in word:
|
||
|
word = word.replace('ü', 'ue')
|
||
|
word = word.replace('ö', 'oe')
|
||
|
word = word.replace('ä', 'ae')
|
||
|
word = word.replace('ß', 'ss')
|
||
|
word = word.strip('-.,?!<>|#+~}{][&%$^°*;:-_')
|
||
|
word = word.encode('ascii', 'ignore').decode('ascii')
|
||
|
if len(word) > 1:
|
||
|
linelist_lower.append(word.lower())
|
||
|
OutDok.write(str(linelist_lower))
|
||
|
OutDok.write('\n')
|
||
|
|