You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

675 lines
25 KiB

#!/usr/bin/env python3
import sys
import io
import struct
from .cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap
from .encodingdb import EncodingDB, name2unicode
from .psparser import PSStackParser
from .psparser import PSEOF
from .psparser import LIT, KWD, handle_error
from .psparser import PSLiteral, literal_name
from .pdftypes import (PDFException, resolve1, int_value, num_value, list_value, dict_value,
stream_value)
from .fontmetrics import FONT_METRICS
from .utils import apply_matrix_norm, nunpack, choplist
def get_widths(seq):
widths = {}
r = []
for v in seq:
if isinstance(v, list):
if r:
char1 = r[-1]
for (i,w) in enumerate(v):
widths[char1+i] = w
r = []
elif isinstance(v, int):
r.append(v)
if len(r) == 3:
(char1,char2,w) = r
for i in range(char1, char2+1):
widths[i] = w
r = []
return widths
#assert get_widths([1]) == {}
#assert get_widths([1,2,3]) == {1:3, 2:3}
#assert get_widths([1,[2,3],6,[7,8]]) == {1:2,2:3, 6:7,7:8}
def get_widths2(seq):
widths = {}
r = []
for v in seq:
if isinstance(v, list):
if r:
char1 = r[-1]
for (i,(w,vx,vy)) in enumerate(choplist(3,v)):
widths[char1+i] = (w,(vx,vy))
r = []
elif isinstance(v, int):
r.append(v)
if len(r) == 5:
(char1,char2,w,vx,vy) = r
for i in range(char1, char2+1):
widths[i] = (w,(vx,vy))
r = []
return widths
#assert get_widths2([1]) == {}
#assert get_widths2([1,2,3,4,5]) == {1:(3,(4,5)), 2:(3,(4,5))}
#assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2,(3,4)), 6:(7,(8,9))}
class FontMetricsDB:
@classmethod
def get_metrics(klass, fontname):
return FONT_METRICS[fontname]
class Type1FontHeaderParser(PSStackParser):
KEYWORD_BEGIN = KWD('begin')
KEYWORD_END = KWD('end')
KEYWORD_DEF = KWD('def')
KEYWORD_PUT = KWD('put')
KEYWORD_DICT = KWD('dict')
KEYWORD_ARRAY = KWD('array')
KEYWORD_READONLY = KWD('readonly')
KEYWORD_FOR = KWD('for')
KEYWORD_FOR = KWD('for')
def __init__(self, data):
PSStackParser.__init__(self, data)
self._cid2unicode = {}
def get_encoding(self):
while 1:
try:
(cid,name) = self.nextobject()
except PSEOF:
break
try:
self._cid2unicode[cid] = name2unicode(name)
except KeyError:
pass
return self._cid2unicode
def do_keyword(self, pos, token):
if token is self.KEYWORD_PUT:
((_,key),(_,value)) = self.pop(2)
if (isinstance(key, int) and
isinstance(value, PSLiteral)):
self.add_results((key, literal_name(value)))
## CFFFont
## (Format specified in Adobe Technical Note: #5176
## "The Compact Font Format Specification")
##
NIBBLES = ('0','1','2','3','4','5','6','7','8','9','.','e','e-',None,'-')
def getdict(data):
d = {}
fp = io.BytesIO(data)
stack = []
while 1:
c = fp.read(1)
if not c: break
b0 = ord(c)
if b0 <= 21:
d[b0] = stack
stack = []
continue
if b0 == 30:
s = ''
loop = True
while loop:
b = ord(fp.read(1))
for n in (b >> 4, b & 15):
if n == 15:
loop = False
else:
s += NIBBLES[n]
value = float(s)
elif 32 <= b0 and b0 <= 246:
value = b0-139
else:
b1 = ord(fp.read(1))
if 247 <= b0 and b0 <= 250:
value = ((b0-247)<<8)+b1+108
elif 251 <= b0 and b0 <= 254:
value = -((b0-251)<<8)-b1-108
else:
b2 = ord(fp.read(1))
if 128 <= b1: b1 -= 256
if b0 == 28:
value = b1<<8 | b2
else:
value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0]
stack.append(value)
return d
class CFFFont:
STANDARD_STRINGS = (
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign',
'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period',
'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash',
'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a',
'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown',
'cent', 'sterling', 'fraction', 'yen', 'florin', 'section',
'currency', 'quotesingle', 'quotedblleft', 'guillemotleft',
'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash',
'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright',
'guillemotright', 'ellipsis', 'perthousand', 'questiondown',
'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve',
'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut',
'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash',
'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu',
'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn',
'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth',
'multiply', 'threesuperior', 'copyright', 'Aacute',
'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde',
'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde',
'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde',
'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave',
'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex',
'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute',
'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex',
'odieresis', 'ograve', 'otilde', 'scaron', 'uacute',
'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis',
'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle',
'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader',
'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle',
'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle',
'sevenoldstyle', 'eightoldstyle', 'nineoldstyle',
'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior',
'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior',
'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior',
'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior',
'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall',
'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall',
'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall',
'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall',
'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall',
'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall',
'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall',
'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior',
'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall',
'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths',
'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior',
'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior',
'seveninferior', 'eightinferior', 'nineinferior',
'centinferior', 'dollarinferior', 'periodinferior',
'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall',
'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall',
'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall',
'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall',
'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall',
'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall',
'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000',
'001.001', '001.002', '001.003', 'Black', 'Bold', 'Book',
'Light', 'Medium', 'Regular', 'Roman', 'Semibold',
)
class INDEX:
def __init__(self, fp):
self.fp = fp
self.offsets = []
(count, offsize) = struct.unpack(b'>HB', self.fp.read(3))
for i in range(count+1):
self.offsets.append(nunpack(self.fp.read(offsize)))
self.base = self.fp.tell()-1
self.fp.seek(self.base+self.offsets[-1])
def __repr__(self):
return '<INDEX: size=%d>' % len(self)
def __len__(self):
return len(self.offsets)-1
def __getitem__(self, i):
self.fp.seek(self.base+self.offsets[i])
return self.fp.read(self.offsets[i+1]-self.offsets[i])
def __iter__(self):
return iter( self[i] for i in range(len(self)) )
def __init__(self, name, fp):
self.name = name
self.fp = fp
# Header
(_major,_minor,hdrsize,offsize) = struct.unpack(b'BBBB', self.fp.read(4))
self.fp.read(hdrsize-4)
# Name INDEX
self.name_index = self.INDEX(self.fp)
# Top DICT INDEX
self.dict_index = self.INDEX(self.fp)
# String INDEX
self.string_index = self.INDEX(self.fp)
# Global Subr INDEX
self.subr_index = self.INDEX(self.fp)
# Top DICT DATA
self.top_dict = getdict(self.dict_index[0])
(charset_pos,) = self.top_dict.get(15, [0])
(encoding_pos,) = self.top_dict.get(16, [0])
(charstring_pos,) = self.top_dict.get(17, [0])
# CharStrings
self.fp.seek(charstring_pos)
self.charstring = self.INDEX(self.fp)
self.nglyphs = len(self.charstring)
# Encodings
self.code2gid = {}
self.gid2code = {}
self.fp.seek(encoding_pos)
format = self.fp.read(1)
if format == b'\x00':
# Format 0
(n,) = struct.unpack(b'B', self.fp.read(1))
for (code,gid) in enumerate(struct.unpack(b'B'*n, self.fp.read(n))):
self.code2gid[code] = gid
self.gid2code[gid] = code
elif format == b'\x01':
# Format 1
(n,) = struct.unpack(b'B', self.fp.read(1))
code = 0
for i in range(n):
(first,nleft) = struct.unpack(b'BB', self.fp.read(2))
for gid in range(first,first+nleft+1):
self.code2gid[code] = gid
self.gid2code[gid] = code
code += 1
else:
raise ValueError('unsupported encoding format: %r' % format)
# Charsets
self.name2gid = {}
self.gid2name = {}
self.fp.seek(charset_pos)
format = self.fp.read(1)
if format == '\x00':
# Format 0
n = self.nglyphs-1
for (gid,sid) in enumerate(struct.unpack(b'>'+b'H'*n, self.fp.read(2*n))):
gid += 1
name = self.getstr(sid)
self.name2gid[name] = gid
self.gid2name[gid] = name
elif format == '\x01':
# Format 1
(n,) = struct.unpack(b'B', self.fp.read(1))
sid = 0
for i in range(n):
(first,nleft) = struct.unpack(b'BB', self.fp.read(2))
for gid in range(first,first+nleft+1):
name = self.getstr(sid)
self.name2gid[name] = gid
self.gid2name[gid] = name
sid += 1
elif format == '\x02':
# Format 2
assert 0
else:
raise ValueError('unsupported charset format: %r' % format)
#print self.code2gid
#print self.name2gid
#assert 0
def getstr(self, sid):
if sid < len(self.STANDARD_STRINGS):
return self.STANDARD_STRINGS[sid]
return self.string_index[sid-len(self.STANDARD_STRINGS)]
class TrueTypeFont:
class CMapNotFound(Exception): pass
def __init__(self, name, fp):
self.name = name
self.fp = fp
self.tables = {}
self.fonttype = fp.read(4)
(ntables, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8))
for _ in range(ntables):
(name, tsum, offset, length) = struct.unpack(b'>4sLLL', fp.read(16))
self.tables[name] = (offset, length)
def create_unicode_map(self):
if 'cmap' not in self.tables:
raise TrueTypeFont.CMapNotFound
(base_offset, length) = self.tables['cmap']
fp = self.fp
fp.seek(base_offset)
(version, nsubtables) = struct.unpack(b'>HH', fp.read(4))
subtables = []
for i in range(nsubtables):
subtables.append(struct.unpack(b'>HHL', fp.read(8)))
char2gid = {}
# Only supports subtable type 0, 2 and 4.
for (_1, _2, st_offset) in subtables:
fp.seek(base_offset+st_offset)
(fmttype, fmtlen, fmtlang) = struct.unpack(b'>HHH', fp.read(6))
if fmttype == 0:
char2gid.update(enumerate(struct.unpack(b'>256B', fp.read(256))))
elif fmttype == 2:
subheaderkeys = struct.unpack(b'>256H', fp.read(512))
firstbytes = [0]*8192
for (i,k) in enumerate(subheaderkeys):
firstbytes[k/8] = i
nhdrs = max(subheaderkeys)/8 + 1
hdrs = []
for i in range(nhdrs):
(firstcode,entcount,delta,offset) = struct.unpack(b'>HHhH', fp.read(8))
hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
for (i,firstcode,entcount,delta,pos) in hdrs:
if not entcount: continue
first = firstcode + (firstbytes[i] << 8)
fp.seek(pos)
for c in range(entcount):
gid = struct.unpack(b'>H', fp.read(2))
if gid:
gid += delta
char2gid[first+c] = gid
elif fmttype == 4:
(segcount, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8))
segcount /= 2
ecs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
fp.read(2)
scs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
idds = struct.unpack(b'>%dh' % segcount, fp.read(2*segcount))
pos = fp.tell()
idrs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
if idr:
fp.seek(pos+idr)
for c in range(sc, ec+1):
char2gid[c] = (struct.unpack(b'>H', fp.read(2))[0] + idd) & 0xffff
else:
for c in range(sc, ec+1):
char2gid[c] = (c + idd) & 0xffff
else:
assert 0
# create unicode map
unicode_map = FileUnicodeMap()
for (char,gid) in char2gid.items():
unicode_map.add_cid2unichr(gid, char)
return unicode_map
## Fonts
##
class PDFFontError(PDFException): pass
class PDFUnicodeNotDefined(PDFFontError): pass
LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
LITERAL_TYPE1C = LIT('Type1C')
class PDFFont:
def __init__(self, descriptor, widths, default_width=None):
self.descriptor = descriptor
self.widths = widths
self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
if isinstance(self.fontname, PSLiteral):
self.fontname = literal_name(self.fontname)
self.flags = int_value(descriptor.get('Flags', 0))
self.ascent = num_value(descriptor.get('Ascent', 0))
self.descent = num_value(descriptor.get('Descent', 0))
self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0))
self.leading = num_value(descriptor.get('Leading', 0))
self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
self.hscale = self.vscale = .001
def __repr__(self):
return '<PDFFont>'
def is_vertical(self):
return False
def is_multibyte(self):
return False
def decode(self, s):
if isinstance(s, str):
return list(map(ord, s))
else: # it's already bytes
return s
def get_ascent(self):
return self.ascent * self.vscale
def get_descent(self):
return self.descent * self.vscale
def get_width(self):
w = self.bbox[2]-self.bbox[0]
if w == 0:
w = -self.default_width
return w * self.hscale
def get_height(self):
h = self.bbox[3]-self.bbox[1]
if h == 0:
h = self.ascent - self.descent
return h * self.vscale
def char_width(self, cid):
return self.widths.get(cid, self.default_width) * self.hscale
def char_disp(self, cid):
return 0
def string_width(self, s):
return sum( self.char_width(cid) for cid in self.decode(s) )
class PDFSimpleFont(PDFFont):
def __init__(self, descriptor, widths, spec):
# Font encoding is specified either by a name of
# built-in encoding or a dictionary that describes
# the differences.
if 'Encoding' in spec:
encoding = resolve1(spec['Encoding'])
else:
encoding = LITERAL_STANDARD_ENCODING
if isinstance(encoding, dict):
name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
diff = list_value(encoding.get('Differences', None))
self.cid2unicode = EncodingDB.get_encoding(name, diff)
else:
self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
self.unicode_map = None
if 'ToUnicode' in spec:
strm = stream_value(spec['ToUnicode'])
self.unicode_map = FileUnicodeMap()
CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run()
PDFFont.__init__(self, descriptor, widths)
def to_unichr(self, cid):
if self.unicode_map:
try:
return self.unicode_map.get_unichr(cid)
except KeyError:
pass
try:
return self.cid2unicode[cid]
except KeyError:
raise PDFUnicodeNotDefined(None, cid)
class PDFType1Font(PDFSimpleFont):
def __init__(self, rsrcmgr, spec):
try:
self.basefont = literal_name(spec['BaseFont'])
except KeyError:
handle_error(PDFFontError, 'BaseFont is missing')
self.basefont = 'unknown'
try:
(descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
except KeyError:
descriptor = dict_value(spec.get('FontDescriptor', {}))
firstchar = int_value(spec.get('FirstChar', 0))
lastchar = int_value(spec.get('LastChar', 255))
widths = list_value(spec.get('Widths', [0]*256))
widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
PDFSimpleFont.__init__(self, descriptor, widths, spec)
if 'Encoding' not in spec and 'FontFile' in descriptor:
# try to recover the missing encoding info from the font file.
self.fontfile = stream_value(descriptor.get('FontFile'))
length1 = int_value(self.fontfile['Length1'])
data = self.fontfile.get_data()[:length1]
parser = Type1FontHeaderParser(io.BytesIO(data))
self.cid2unicode = parser.get_encoding()
def __repr__(self):
return '<PDFType1Font: basefont=%r>' % self.basefont
class PDFTrueTypeFont(PDFType1Font):
def __repr__(self):
return '<PDFTrueTypeFont: basefont=%r>' % self.basefont
class PDFType3Font(PDFSimpleFont):
def __init__(self, rsrcmgr, spec):
firstchar = int_value(spec.get('FirstChar', 0))
lastchar = int_value(spec.get('LastChar', 0))
widths = list_value(spec.get('Widths', [0]*256))
widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths))
if 'FontDescriptor' in spec:
descriptor = dict_value(spec['FontDescriptor'])
else:
descriptor = {'Ascent':0, 'Descent':0,
'FontBBox':spec['FontBBox']}
PDFSimpleFont.__init__(self, descriptor, widths, spec)
self.matrix = tuple(list_value(spec.get('FontMatrix')))
(_,self.descent,_,self.ascent) = self.bbox
(self.hscale,self.vscale) = apply_matrix_norm(self.matrix, (1,1))
def __repr__(self):
return '<PDFType3Font>'
class PDFCIDFont(PDFFont):
def __init__(self, rsrcmgr, spec):
try:
self.basefont = literal_name(spec['BaseFont'])
except KeyError:
handle_error(PDFFontError, 'BaseFont is missing')
self.basefont = 'unknown'
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
self.cidsysteminfo.get('Ordering', 'unknown'))
try:
name = literal_name(spec['Encoding'])
except KeyError:
handle_error(PDFFontError, 'Encoding is unspecified')
name = 'unknown'
try:
self.cmap = CMapDB.get_cmap(name)
except CMapDB.CMapNotFound as e:
handle_error(PDFFontError, str(e))
self.cmap = CMap()
try:
descriptor = dict_value(spec['FontDescriptor'])
except KeyError:
handle_error(PDFFontError, 'FontDescriptor is missing')
descriptor = {}
ttf = None
if 'FontFile2' in descriptor:
self.fontfile = stream_value(descriptor.get('FontFile2'))
ttf = TrueTypeFont(self.basefont,
io.BytesIO(self.fontfile.get_data()))
self.unicode_map = None
if 'ToUnicode' in spec:
strm = stream_value(spec['ToUnicode'])
self.unicode_map = FileUnicodeMap()
CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run()
elif self.cidcoding == 'Adobe-Identity':
if ttf:
try:
self.unicode_map = ttf.create_unicode_map()
except TrueTypeFont.CMapNotFound:
pass
else:
try:
self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
except CMapDB.CMapNotFound as e:
pass
self.vertical = self.cmap.is_vertical()
if self.vertical:
# writing mode: vertical
widths = get_widths2(list_value(spec.get('W2', [])))
self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.items() )
(vy,w) = spec.get('DW2', [880, -1000])
self.default_disp = (None,vy)
widths = dict( (cid,w) for (cid,(w,_)) in widths.items() )
default_width = w
else:
# writing mode: horizontal
self.disps = {}
self.default_disp = 0
widths = get_widths(list_value(spec.get('W', [])))
default_width = spec.get('DW', 1000)
PDFFont.__init__(self, descriptor, widths, default_width=default_width)
def __repr__(self):
return '<PDFCIDFont: basefont=%r, cidcoding=%r>' % (self.basefont, self.cidcoding)
def is_vertical(self):
return self.vertical
def is_multibyte(self):
return True
def decode(self, bytes):
return self.cmap.decode(bytes)
def char_disp(self, cid):
"Returns an integer for horizontal fonts, a tuple for vertical fonts."
return self.disps.get(cid, self.default_disp)
def to_unichr(self, cid):
try:
if not self.unicode_map:
raise KeyError(cid)
return self.unicode_map.get_unichr(cid)
except KeyError:
raise PDFUnicodeNotDefined(self.cidcoding, cid)
def main(argv):
for fname in argv[1:]:
fp = io.open(fname, 'rb')
#font = TrueTypeFont(fname, fp)
font = CFFFont(fname, fp)
print(font)
fp.close()
if __name__ == '__main__':
sys.exit(main(sys.argv))