|
|
- import os
- import sys
-
- Indok = sys.argv[1]
- Outdok = sys.argv[2]
-
- with open(Indok) as InDok:
- with open(Outdok, 'a') as OutDok:
- for line in InDok:
- linelist_lower = []
- linelist = eval(line[:-1])
- for word in linelist:
- if '_' and '^' and 'x0' and 'x1' not in word:
- word = word.replace('ü', 'ue')
- word = word.replace('ö', 'oe')
- word = word.replace('ä', 'ae')
- word = word.replace('ß', 'ss')
- word = word.strip('-.,?!<>|#+~}{][&%$^°*;:-_')
- word = word.encode('ascii', 'ignore').decode('ascii')
- if len(word) > 1:
- linelist_lower.append(word.lower())
- OutDok.write(str(linelist_lower))
- OutDok.write('\n')
-
|