#!/usr/bin/env python3 """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version). This code is in the public domain. """ import re import struct def ascii85decode(data): """ In ASCII85 encoding, every four bytes are encoded with five ASCII letters, using 85 different types of characters (as 256**4 < 85**5). When the length of the original bytes is not a multiple of 4, a special rule is used for round up. The Adobe's ASCII85 implementation is slightly different from its original in handling the last characters. The sample string is taken from: http://en.wikipedia.org/w/index.php?title=Ascii85 """ if isinstance(data, str): data = data.encode('ascii') n = b = 0 out = bytearray() for c in data: if ord('!') <= c and c <= ord('u'): n += 1 b = b*85+(c-33) if n == 5: out += struct.pack(b'>L',b) n = b = 0 elif c == ord('z'): assert n == 0 out += b'\0\0\0\0' elif c == ord('~'): if n: for _ in range(5-n): b = b*85+84 out += struct.pack(b'>L',b)[:n-1] break return bytes(out) hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE) trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) def asciihexdecode(data): """ ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1 For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the ASCIIHexDecode filter produces one byte of binary data. All white-space characters are ignored. A right angle bracket character (>) indicates EOD. Any other characters will cause an error. If the filter encounters the EOD marker after reading an odd number of hexadecimal digits, it will behave as if a 0 followed the last digit. """ decode = (lambda hx: chr(int(hx, 16))) out = list(map(decode, hex_re.findall(data))) m = trail_re.search(data) if m: out.append(decode("%c0" % m.group(1))) return ''.join(out)