You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.1 KiB

4 years ago
  1. #!/usr/bin/env python3
  2. """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
  3. This code is in the public domain.
  4. """
  5. import re
  6. import struct
  7. def ascii85decode(data):
  8. """
  9. In ASCII85 encoding, every four bytes are encoded with five ASCII
  10. letters, using 85 different types of characters (as 256**4 < 85**5).
  11. When the length of the original bytes is not a multiple of 4, a special
  12. rule is used for round up.
  13. The Adobe's ASCII85 implementation is slightly different from
  14. its original in handling the last characters.
  15. The sample string is taken from:
  16. http://en.wikipedia.org/w/index.php?title=Ascii85
  17. """
  18. if isinstance(data, str):
  19. data = data.encode('ascii')
  20. n = b = 0
  21. out = bytearray()
  22. for c in data:
  23. if ord('!') <= c and c <= ord('u'):
  24. n += 1
  25. b = b*85+(c-33)
  26. if n == 5:
  27. out += struct.pack(b'>L',b)
  28. n = b = 0
  29. elif c == ord('z'):
  30. assert n == 0
  31. out += b'\0\0\0\0'
  32. elif c == ord('~'):
  33. if n:
  34. for _ in range(5-n):
  35. b = b*85+84
  36. out += struct.pack(b'>L',b)[:n-1]
  37. break
  38. return bytes(out)
  39. hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
  40. trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
  41. def asciihexdecode(data):
  42. """
  43. ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
  44. For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
  45. ASCIIHexDecode filter produces one byte of binary data. All white-space
  46. characters are ignored. A right angle bracket character (>) indicates
  47. EOD. Any other characters will cause an error. If the filter encounters
  48. the EOD marker after reading an odd number of hexadecimal digits, it
  49. will behave as if a 0 followed the last digit.
  50. """
  51. decode = (lambda hx: chr(int(hx, 16)))
  52. out = list(map(decode, hex_re.findall(data)))
  53. m = trail_re.search(data)
  54. if m:
  55. out.append(decode("%c0" % m.group(1)))
  56. return ''.join(out)