You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

130 lines
2.3 KiB

4 years ago
  1. #!/home/alpcentaur/ProjektA/PrototypeWebApp/venv/bin/python3.5
  2. #
  3. # latin2ascii.py - converts latin1 characters into ascii.
  4. #
  5. import sys
  6. """ Mappings from Latin-1 characters to ASCII.
  7. This is an in-house mapping table for some Latin-1 characters
  8. (acutes, umlauts, etc.) to ASCII strings.
  9. """
  10. LATIN2ASCII = {
  11. #0x00a0: '',
  12. #0x00a7: '',
  13. # iso-8859-1
  14. 0x00c0: 'A`',
  15. 0x00c1: "A'",
  16. 0x00c2: 'A^',
  17. 0x00c3: 'A~',
  18. 0x00c4: 'A:',
  19. 0x00c5: 'A%',
  20. 0x00c6: 'AE',
  21. 0x00c7: 'C,',
  22. 0x00c8: 'E`',
  23. 0x00c9: "E'",
  24. 0x00ca: 'E^',
  25. 0x00cb: 'E:',
  26. 0x00cc: 'I`',
  27. 0x00cd: "I'",
  28. 0x00ce: 'I^',
  29. 0x00cf: 'I:',
  30. 0x00d0: "D'",
  31. 0x00d1: 'N~',
  32. 0x00d2: 'O`',
  33. 0x00d3: "O'",
  34. 0x00d4: 'O^',
  35. 0x00d5: 'O~',
  36. 0x00d6: 'O:',
  37. 0x00d8: 'O/',
  38. 0x00d9: 'U`',
  39. 0x00da: "U'",
  40. 0x00db: 'U~',
  41. 0x00dc: 'U:',
  42. 0x00dd: "Y'",
  43. 0x00df: 'ss',
  44. 0x00e0: 'a`',
  45. 0x00e1: "a'",
  46. 0x00e2: 'a^',
  47. 0x00e3: 'a~',
  48. 0x00e4: 'a:',
  49. 0x00e5: 'a%',
  50. 0x00e6: 'ae',
  51. 0x00e7: 'c,',
  52. 0x00e8: 'e`',
  53. 0x00e9: "e'",
  54. 0x00ea: 'e^',
  55. 0x00eb: 'e:',
  56. 0x00ec: 'i`',
  57. 0x00ed: "i'",
  58. 0x00ee: 'i^',
  59. 0x00ef: 'i:',
  60. 0x00f0: "d'",
  61. 0x00f1: 'n~',
  62. 0x00f2: 'o`',
  63. 0x00f3: "o'",
  64. 0x00f4: 'o^',
  65. 0x00f5: 'o~',
  66. 0x00f6: 'o:',
  67. 0x00f8: 'o/',
  68. 0x00f9: 'o`',
  69. 0x00fa: "u'",
  70. 0x00fb: 'u~',
  71. 0x00fc: 'u:',
  72. 0x00fd: "y'",
  73. 0x00ff: 'y:',
  74. # Ligatures
  75. 0x0152: 'OE',
  76. 0x0153: 'oe',
  77. 0x0132: 'IJ',
  78. 0x0133: 'ij',
  79. 0x1d6b: 'ue',
  80. 0xfb00: 'ff',
  81. 0xfb01: 'fi',
  82. 0xfb02: 'fl',
  83. 0xfb03: 'ffi',
  84. 0xfb04: 'ffl',
  85. 0xfb05: 'ft',
  86. 0xfb06: 'st',
  87. # Symbols
  88. #0x2013: '',
  89. 0x2014: '--',
  90. 0x2015: '||',
  91. 0x2018: '`',
  92. 0x2019: "'",
  93. 0x201c: '``',
  94. 0x201d: "''",
  95. #0x2022: '',
  96. #0x2212: '',
  97. }
  98. def latin2ascii(s):
  99. return ''.join( LATIN2ASCII.get(ord(c),c) for c in s )
  100. def main(argv):
  101. import getopt, fileinput
  102. def usage():
  103. print('usage: %s [-c codec] file ...' % argv[0])
  104. return 100
  105. try:
  106. (opts, args) = getopt.getopt(argv[1:], 'c')
  107. except getopt.GetoptError:
  108. return usage()
  109. if not args: return usage()
  110. codec = 'utf-8'
  111. for (k, v) in opts:
  112. if k == '-c': codec = v
  113. for line in fileinput.input(args):
  114. line = latin2ascii(str(line, codec, 'ignore'))
  115. sys.stdout.write(line.encode('ascii', 'replace'))
  116. if __name__ == '__main__':
  117. sys.exit(main(sys.argv))