#!/usr/bin/env python3 import sys import io import struct from .cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap from .encodingdb import EncodingDB, name2unicode from .psparser import PSStackParser from .psparser import PSEOF from .psparser import LIT, KWD, handle_error from .psparser import PSLiteral, literal_name from .pdftypes import (PDFException, resolve1, int_value, num_value, list_value, dict_value, stream_value) from .fontmetrics import FONT_METRICS from .utils import apply_matrix_norm, nunpack, choplist def get_widths(seq): widths = {} r = [] for v in seq: if isinstance(v, list): if r: char1 = r[-1] for (i,w) in enumerate(v): widths[char1+i] = w r = [] elif isinstance(v, int): r.append(v) if len(r) == 3: (char1,char2,w) = r for i in range(char1, char2+1): widths[i] = w r = [] return widths #assert get_widths([1]) == {} #assert get_widths([1,2,3]) == {1:3, 2:3} #assert get_widths([1,[2,3],6,[7,8]]) == {1:2,2:3, 6:7,7:8} def get_widths2(seq): widths = {} r = [] for v in seq: if isinstance(v, list): if r: char1 = r[-1] for (i,(w,vx,vy)) in enumerate(choplist(3,v)): widths[char1+i] = (w,(vx,vy)) r = [] elif isinstance(v, int): r.append(v) if len(r) == 5: (char1,char2,w,vx,vy) = r for i in range(char1, char2+1): widths[i] = (w,(vx,vy)) r = [] return widths #assert get_widths2([1]) == {} #assert get_widths2([1,2,3,4,5]) == {1:(3,(4,5)), 2:(3,(4,5))} #assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2,(3,4)), 6:(7,(8,9))} class FontMetricsDB: @classmethod def get_metrics(klass, fontname): return FONT_METRICS[fontname] class Type1FontHeaderParser(PSStackParser): KEYWORD_BEGIN = KWD('begin') KEYWORD_END = KWD('end') KEYWORD_DEF = KWD('def') KEYWORD_PUT = KWD('put') KEYWORD_DICT = KWD('dict') KEYWORD_ARRAY = KWD('array') KEYWORD_READONLY = KWD('readonly') KEYWORD_FOR = KWD('for') KEYWORD_FOR = KWD('for') def __init__(self, data): PSStackParser.__init__(self, data) self._cid2unicode = {} def get_encoding(self): while 1: try: (cid,name) = self.nextobject() except PSEOF: break try: self._cid2unicode[cid] = name2unicode(name) except KeyError: pass return self._cid2unicode def do_keyword(self, pos, token): if token is self.KEYWORD_PUT: ((_,key),(_,value)) = self.pop(2) if (isinstance(key, int) and isinstance(value, PSLiteral)): self.add_results((key, literal_name(value))) ## CFFFont ## (Format specified in Adobe Technical Note: #5176 ## "The Compact Font Format Specification") ## NIBBLES = ('0','1','2','3','4','5','6','7','8','9','.','e','e-',None,'-') def getdict(data): d = {} fp = io.BytesIO(data) stack = [] while 1: c = fp.read(1) if not c: break b0 = ord(c) if b0 <= 21: d[b0] = stack stack = [] continue if b0 == 30: s = '' loop = True while loop: b = ord(fp.read(1)) for n in (b >> 4, b & 15): if n == 15: loop = False else: s += NIBBLES[n] value = float(s) elif 32 <= b0 and b0 <= 246: value = b0-139 else: b1 = ord(fp.read(1)) if 247 <= b0 and b0 <= 250: value = ((b0-247)<<8)+b1+108 elif 251 <= b0 and b0 <= 254: value = -((b0-251)<<8)-b1-108 else: b2 = ord(fp.read(1)) if 128 <= b1: b1 -= 256 if b0 == 28: value = b1<<8 | b2 else: value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0] stack.append(value) return d class CFFFont: STANDARD_STRINGS = ( '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply', 'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall', 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior', 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior', 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior', 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall', 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', ) class INDEX: def __init__(self, fp): self.fp = fp self.offsets = [] (count, offsize) = struct.unpack(b'>HB', self.fp.read(3)) for i in range(count+1): self.offsets.append(nunpack(self.fp.read(offsize))) self.base = self.fp.tell()-1 self.fp.seek(self.base+self.offsets[-1]) def __repr__(self): return '<INDEX: size=%d>' % len(self) def __len__(self): return len(self.offsets)-1 def __getitem__(self, i): self.fp.seek(self.base+self.offsets[i]) return self.fp.read(self.offsets[i+1]-self.offsets[i]) def __iter__(self): return iter( self[i] for i in range(len(self)) ) def __init__(self, name, fp): self.name = name self.fp = fp # Header (_major,_minor,hdrsize,offsize) = struct.unpack(b'BBBB', self.fp.read(4)) self.fp.read(hdrsize-4) # Name INDEX self.name_index = self.INDEX(self.fp) # Top DICT INDEX self.dict_index = self.INDEX(self.fp) # String INDEX self.string_index = self.INDEX(self.fp) # Global Subr INDEX self.subr_index = self.INDEX(self.fp) # Top DICT DATA self.top_dict = getdict(self.dict_index[0]) (charset_pos,) = self.top_dict.get(15, [0]) (encoding_pos,) = self.top_dict.get(16, [0]) (charstring_pos,) = self.top_dict.get(17, [0]) # CharStrings self.fp.seek(charstring_pos) self.charstring = self.INDEX(self.fp) self.nglyphs = len(self.charstring) # Encodings self.code2gid = {} self.gid2code = {} self.fp.seek(encoding_pos) format = self.fp.read(1) if format == b'\x00': # Format 0 (n,) = struct.unpack(b'B', self.fp.read(1)) for (code,gid) in enumerate(struct.unpack(b'B'*n, self.fp.read(n))): self.code2gid[code] = gid self.gid2code[gid] = code elif format == b'\x01': # Format 1 (n,) = struct.unpack(b'B', self.fp.read(1)) code = 0 for i in range(n): (first,nleft) = struct.unpack(b'BB', self.fp.read(2)) for gid in range(first,first+nleft+1): self.code2gid[code] = gid self.gid2code[gid] = code code += 1 else: raise ValueError('unsupported encoding format: %r' % format) # Charsets self.name2gid = {} self.gid2name = {} self.fp.seek(charset_pos) format = self.fp.read(1) if format == '\x00': # Format 0 n = self.nglyphs-1 for (gid,sid) in enumerate(struct.unpack(b'>'+b'H'*n, self.fp.read(2*n))): gid += 1 name = self.getstr(sid) self.name2gid[name] = gid self.gid2name[gid] = name elif format == '\x01': # Format 1 (n,) = struct.unpack(b'B', self.fp.read(1)) sid = 0 for i in range(n): (first,nleft) = struct.unpack(b'BB', self.fp.read(2)) for gid in range(first,first+nleft+1): name = self.getstr(sid) self.name2gid[name] = gid self.gid2name[gid] = name sid += 1 elif format == '\x02': # Format 2 assert 0 else: raise ValueError('unsupported charset format: %r' % format) #print self.code2gid #print self.name2gid #assert 0 def getstr(self, sid): if sid < len(self.STANDARD_STRINGS): return self.STANDARD_STRINGS[sid] return self.string_index[sid-len(self.STANDARD_STRINGS)] class TrueTypeFont: class CMapNotFound(Exception): pass def __init__(self, name, fp): self.name = name self.fp = fp self.tables = {} self.fonttype = fp.read(4) (ntables, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8)) for _ in range(ntables): (name, tsum, offset, length) = struct.unpack(b'>4sLLL', fp.read(16)) self.tables[name] = (offset, length) def create_unicode_map(self): if 'cmap' not in self.tables: raise TrueTypeFont.CMapNotFound (base_offset, length) = self.tables['cmap'] fp = self.fp fp.seek(base_offset) (version, nsubtables) = struct.unpack(b'>HH', fp.read(4)) subtables = [] for i in range(nsubtables): subtables.append(struct.unpack(b'>HHL', fp.read(8))) char2gid = {} # Only supports subtable type 0, 2 and 4. for (_1, _2, st_offset) in subtables: fp.seek(base_offset+st_offset) (fmttype, fmtlen, fmtlang) = struct.unpack(b'>HHH', fp.read(6)) if fmttype == 0: char2gid.update(enumerate(struct.unpack(b'>256B', fp.read(256)))) elif fmttype == 2: subheaderkeys = struct.unpack(b'>256H', fp.read(512)) firstbytes = [0]*8192 for (i,k) in enumerate(subheaderkeys): firstbytes[k/8] = i nhdrs = max(subheaderkeys)/8 + 1 hdrs = [] for i in range(nhdrs): (firstcode,entcount,delta,offset) = struct.unpack(b'>HHhH', fp.read(8)) hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset)) for (i,firstcode,entcount,delta,pos) in hdrs: if not entcount: continue first = firstcode + (firstbytes[i] << 8) fp.seek(pos) for c in range(entcount): gid = struct.unpack(b'>H', fp.read(2)) if gid: gid += delta char2gid[first+c] = gid elif fmttype == 4: (segcount, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8)) segcount /= 2 ecs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount)) fp.read(2) scs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount)) idds = struct.unpack(b'>%dh' % segcount, fp.read(2*segcount)) pos = fp.tell() idrs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount)) for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs): if idr: fp.seek(pos+idr) for c in range(sc, ec+1): char2gid[c] = (struct.unpack(b'>H', fp.read(2))[0] + idd) & 0xffff else: for c in range(sc, ec+1): char2gid[c] = (c + idd) & 0xffff else: assert 0 # create unicode map unicode_map = FileUnicodeMap() for (char,gid) in char2gid.items(): unicode_map.add_cid2unichr(gid, char) return unicode_map ## Fonts ## class PDFFontError(PDFException): pass class PDFUnicodeNotDefined(PDFFontError): pass LITERAL_STANDARD_ENCODING = LIT('StandardEncoding') LITERAL_TYPE1C = LIT('Type1C') class PDFFont: def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0)) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) self.hscale = self.vscale = .001 def __repr__(self): return '<PDFFont>' def is_vertical(self): return False def is_multibyte(self): return False def decode(self, s): if isinstance(s, str): return list(map(ord, s)) else: # it's already bytes return s def get_ascent(self): return self.ascent * self.vscale def get_descent(self): return self.descent * self.vscale def get_width(self): w = self.bbox[2]-self.bbox[0] if w == 0: w = -self.default_width return w * self.hscale def get_height(self): h = self.bbox[3]-self.bbox[1] if h == 0: h = self.ascent - self.descent return h * self.vscale def char_width(self, cid): return self.widths.get(cid, self.default_width) * self.hscale def char_disp(self, cid): return 0 def string_width(self, s): return sum( self.char_width(cid) for cid in self.decode(s) ) class PDFSimpleFont(PDFFont): def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if 'Encoding' in spec: encoding = resolve1(spec['Encoding']) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get('Differences', None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) def to_unichr(self, cid): if self.unicode_map: try: return self.unicode_map.get_unichr(cid) except KeyError: pass try: return self.cid2unicode[cid] except KeyError: raise PDFUnicodeNotDefined(None, cid) class PDFType1Font(PDFSimpleFont): def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: handle_error(PDFFontError, 'BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(io.BytesIO(data)) self.cid2unicode = parser.get_encoding() def __repr__(self): return '<PDFType1Font: basefont=%r>' % self.basefont class PDFTrueTypeFont(PDFType1Font): def __repr__(self): return '<PDFTrueTypeFont: basefont=%r>' % self.basefont class PDFType3Font(PDFSimpleFont): def __init__(self, rsrcmgr, spec): firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 0)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths)) if 'FontDescriptor' in spec: descriptor = dict_value(spec['FontDescriptor']) else: descriptor = {'Ascent':0, 'Descent':0, 'FontBBox':spec['FontBBox']} PDFSimpleFont.__init__(self, descriptor, widths, spec) self.matrix = tuple(list_value(spec.get('FontMatrix'))) (_,self.descent,_,self.ascent) = self.bbox (self.hscale,self.vscale) = apply_matrix_norm(self.matrix, (1,1)) def __repr__(self): return '<PDFType3Font>' class PDFCIDFont(PDFFont): def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: handle_error(PDFFontError, 'BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: handle_error(PDFFontError, 'Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound as e: handle_error(PDFFontError, str(e)) self.cmap = CMap() try: descriptor = dict_value(spec['FontDescriptor']) except KeyError: handle_error(PDFFontError, 'FontDescriptor is missing') descriptor = {} ttf = None if 'FontFile2' in descriptor: self.fontfile = stream_value(descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, io.BytesIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run() elif self.cidcoding == 'Adobe-Identity': if ttf: try: self.unicode_map = ttf.create_unicode_map() except TrueTypeFont.CMapNotFound: pass else: try: self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical()) except CMapDB.CMapNotFound as e: pass self.vertical = self.cmap.is_vertical() if self.vertical: # writing mode: vertical widths = get_widths2(list_value(spec.get('W2', []))) self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.items() ) (vy,w) = spec.get('DW2', [880, -1000]) self.default_disp = (None,vy) widths = dict( (cid,w) for (cid,(w,_)) in widths.items() ) default_width = w else: # writing mode: horizontal self.disps = {} self.default_disp = 0 widths = get_widths(list_value(spec.get('W', []))) default_width = spec.get('DW', 1000) PDFFont.__init__(self, descriptor, widths, default_width=default_width) def __repr__(self): return '<PDFCIDFont: basefont=%r, cidcoding=%r>' % (self.basefont, self.cidcoding) def is_vertical(self): return self.vertical def is_multibyte(self): return True def decode(self, bytes): return self.cmap.decode(bytes) def char_disp(self, cid): "Returns an integer for horizontal fonts, a tuple for vertical fonts." return self.disps.get(cid, self.default_disp) def to_unichr(self, cid): try: if not self.unicode_map: raise KeyError(cid) return self.unicode_map.get_unichr(cid) except KeyError: raise PDFUnicodeNotDefined(self.cidcoding, cid) def main(argv): for fname in argv[1:]: fp = io.open(fname, 'rb') #font = TrueTypeFont(fname, fp) font = CFFFont(fname, fp) print(font) fp.close() if __name__ == '__main__': sys.exit(main(sys.argv))