Search on legal documents using Tensorflow and a web_actix web interface
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

22 lines
528 B

  1. # coding=utf-8
  2. import os
  3. import logging
  4. import sys
  5. import re
  6. directoryIn = sys.argv[1]
  7. Outdok = sys.argv[2]
  8. cwd = os.getcwd()
  9. documentsIn = os.listdir(cwd + '/' + directoryIn)
  10. for document in documentsIn:
  11. with open(Outdok, 'a') as OutDok:
  12. with open(cwd + '/' + directoryIn + document) as Indok:
  13. lines = []
  14. for line in Indok:
  15. lines += [str(line)[:-1]]
  16. OutDok.write(str(re.sub("[^a-zA-Züäöß]", " ", ' '.join(lines)).split()))
  17. OutDok.write('\n')