|
|
- """
- This is a python interface to Adobe Font Metrics Files. Although a
- number of other python implementations exist, and may be more complete
- than this, it was decided not to go with them because they were
- either:
-
- 1) copyrighted or used a non-BSD compatible license
-
- 2) had too many dependencies and a free standing lib was needed
-
- 3) Did more than needed and it was easier to write afresh rather than
- figure out how to get just what was needed.
-
- It is pretty easy to use, and requires only built-in python libs:
-
- >>> from matplotlib import rcParams
- >>> import os.path
- >>> afm_fname = os.path.join(rcParams['datapath'],
- ... 'fonts', 'afm', 'ptmr8a.afm')
- >>>
- >>> from matplotlib.afm import AFM
- >>> with open(afm_fname, 'rb') as fh:
- ... afm = AFM(fh)
- >>> afm.string_width_height('What the heck?')
- (6220.0, 694)
- >>> afm.get_fontname()
- 'Times-Roman'
- >>> afm.get_kern_dist('A', 'f')
- 0
- >>> afm.get_kern_dist('A', 'y')
- -92.0
- >>> afm.get_bbox_char('!')
- [130, -9, 238, 676]
-
- As in the Adobe Font Metrics File Format Specification, all dimensions
- are given in units of 1/1000 of the scale factor (point size) of the font
- being used.
- """
-
- from collections import namedtuple
- import re
- import sys
-
- from ._mathtext_data import uni2type1
- from matplotlib.cbook import deprecated
-
-
- # some afm files have floats where we are expecting ints -- there is
- # probably a better way to handle this (support floats, round rather
- # than truncate). But I don't know what the best approach is now and
- # this change to _to_int should at least prevent mpl from crashing on
- # these JDH (2009-11-06)
-
- def _to_int(x):
- return int(float(x))
-
-
- _to_float = float
-
-
- def _to_str(x):
- return x.decode('utf8')
-
-
- def _to_list_of_ints(s):
- s = s.replace(b',', b' ')
- return [_to_int(val) for val in s.split()]
-
-
- def _to_list_of_floats(s):
- return [_to_float(val) for val in s.split()]
-
-
- def _to_bool(s):
- if s.lower().strip() in (b'false', b'0', b'no'):
- return False
- else:
- return True
-
-
- def _sanity_check(fh):
- """
- Check if the file at least looks like AFM.
- If not, raise :exc:`RuntimeError`.
- """
-
- # Remember the file position in case the caller wants to
- # do something else with the file.
- pos = fh.tell()
- try:
- line = next(fh)
- finally:
- fh.seek(pos, 0)
-
- # AFM spec, Section 4: The StartFontMetrics keyword [followed by a
- # version number] must be the first line in the file, and the
- # EndFontMetrics keyword must be the last non-empty line in the
- # file. We just check the first line.
- if not line.startswith(b'StartFontMetrics'):
- raise RuntimeError('Not an AFM file')
-
-
- def _parse_header(fh):
- """
- Reads the font metrics header (up to the char metrics) and returns
- a dictionary mapping *key* to *val*. *val* will be converted to the
- appropriate python type as necessary; e.g.:
-
- * 'False'->False
- * '0'->0
- * '-168 -218 1000 898'-> [-168, -218, 1000, 898]
-
- Dictionary keys are
-
- StartFontMetrics, FontName, FullName, FamilyName, Weight,
- ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
- UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
- XHeight, Ascender, Descender, StartCharMetrics
-
- """
- headerConverters = {
- b'StartFontMetrics': _to_float,
- b'FontName': _to_str,
- b'FullName': _to_str,
- b'FamilyName': _to_str,
- b'Weight': _to_str,
- b'ItalicAngle': _to_float,
- b'IsFixedPitch': _to_bool,
- b'FontBBox': _to_list_of_ints,
- b'UnderlinePosition': _to_int,
- b'UnderlineThickness': _to_int,
- b'Version': _to_str,
- b'Notice': _to_str,
- b'EncodingScheme': _to_str,
- b'CapHeight': _to_float, # Is the second version a mistake, or
- b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
- b'XHeight': _to_float,
- b'Ascender': _to_float,
- b'Descender': _to_float,
- b'StdHW': _to_float,
- b'StdVW': _to_float,
- b'StartCharMetrics': _to_int,
- b'CharacterSet': _to_str,
- b'Characters': _to_int,
- }
-
- d = {}
- for line in fh:
- line = line.rstrip()
- if line.startswith(b'Comment'):
- continue
- lst = line.split(b' ', 1)
-
- key = lst[0]
- if len(lst) == 2:
- val = lst[1]
- else:
- val = b''
-
- try:
- d[key] = headerConverters[key](val)
- except ValueError:
- print('Value error parsing header in AFM:', key, val,
- file=sys.stderr)
- continue
- except KeyError:
- print('Found an unknown keyword in AFM header (was %r)' % key,
- file=sys.stderr)
- continue
- if key == b'StartCharMetrics':
- return d
- raise RuntimeError('Bad parse')
-
-
- CharMetrics = namedtuple('CharMetrics', 'width, name, bbox')
- CharMetrics.__doc__ = """
- Represents the character metrics of a single character.
-
- Notes
- -----
- The fields do currently only describe a subset of character metrics
- information defined in the AFM standard.
- """
- CharMetrics.width.__doc__ = """The character width (WX)."""
- CharMetrics.name.__doc__ = """The character name (N)."""
- CharMetrics.bbox.__doc__ = """
- The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""
-
-
- def _parse_char_metrics(fh):
- """
- Parse the given filehandle for character metrics information and return
- the information as dicts.
-
- It is assumed that the file cursor is on the line behind
- 'StartCharMetrics'.
-
- Returns
- -------
- ascii_d : dict
- A mapping "ASCII num of the character" to `.CharMetrics`.
- name_d : dict
- A mapping "character name" to `.CharMetrics`.
-
- Notes
- -----
- This function is incomplete per the standard, but thus far parses
- all the sample afm files tried.
- """
- required_keys = {'C', 'WX', 'N', 'B'}
-
- ascii_d = {}
- name_d = {}
- for line in fh:
- # We are defensively letting values be utf8. The spec requires
- # ascii, but there are non-compliant fonts in circulation
- line = _to_str(line.rstrip()) # Convert from byte-literal
- if line.startswith('EndCharMetrics'):
- return ascii_d, name_d
- # Split the metric line into a dictionary, keyed by metric identifiers
- vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s)
- # There may be other metrics present, but only these are needed
- if not required_keys.issubset(vals):
- raise RuntimeError('Bad char metrics line: %s' % line)
- num = _to_int(vals['C'])
- wx = _to_float(vals['WX'])
- name = vals['N']
- bbox = _to_list_of_floats(vals['B'])
- bbox = list(map(int, bbox))
- metrics = CharMetrics(wx, name, bbox)
- # Workaround: If the character name is 'Euro', give it the
- # corresponding character code, according to WinAnsiEncoding (see PDF
- # Reference).
- if name == 'Euro':
- num = 128
- if num != -1:
- ascii_d[num] = metrics
- name_d[name] = metrics
- raise RuntimeError('Bad parse')
-
-
- def _parse_kern_pairs(fh):
- """
- Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
- values are the kern pair value. For example, a kern pairs line like
- ``KPX A y -50``
-
- will be represented as::
-
- d[ ('A', 'y') ] = -50
-
- """
-
- line = next(fh)
- if not line.startswith(b'StartKernPairs'):
- raise RuntimeError('Bad start of kern pairs data: %s' % line)
-
- d = {}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- if line.startswith(b'EndKernPairs'):
- next(fh) # EndKernData
- return d
- vals = line.split()
- if len(vals) != 4 or vals[0] != b'KPX':
- raise RuntimeError('Bad kern pairs line: %s' % line)
- c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])
- d[(c1, c2)] = val
- raise RuntimeError('Bad kern pairs parse')
-
-
- CompositePart = namedtuple('CompositePart', 'name, dx, dy')
- CompositePart.__doc__ = """
- Represents the information on a composite element of a composite char."""
- CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'."""
- CompositePart.dx.__doc__ = """x-displacement of the part from the origin."""
- CompositePart.dy.__doc__ = """y-displacement of the part from the origin."""
-
-
- def _parse_composites(fh):
- """
- Parse the given filehandle for composites information return them as a
- dict.
-
- It is assumed that the file cursor is on the line behind 'StartComposites'.
-
- Returns
- -------
- composites : dict
- A dict mapping composite character names to a parts list. The parts
- list is a list of `.CompositePart` entries describing the parts of
- the composite.
-
- Example
- -------
- A composite definition line::
-
- CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;
-
- will be represented as::
-
- composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0),
- CompositePart(name='acute', dx=160, dy=170)]
-
- """
- composites = {}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- if line.startswith(b'EndComposites'):
- return composites
- vals = line.split(b';')
- cc = vals[0].split()
- name, numParts = cc[1], _to_int(cc[2])
- pccParts = []
- for s in vals[1:-1]:
- pcc = s.split()
- part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3]))
- pccParts.append(part)
- composites[name] = pccParts
-
- raise RuntimeError('Bad composites parse')
-
-
- def _parse_optional(fh):
- """
- Parse the optional fields for kern pair data and composites.
-
- Returns
- -------
- kern_data : dict
- A dict containing kerning information. May be empty.
- See `._parse_kern_pairs`.
- composites : dict
- A dict containing composite information. May be empty.
- See `._parse_composites`.
- """
- optional = {
- b'StartKernData': _parse_kern_pairs,
- b'StartComposites': _parse_composites,
- }
-
- d = {b'StartKernData': {},
- b'StartComposites': {}}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- key = line.split()[0]
-
- if key in optional:
- d[key] = optional[key](fh)
-
- return d[b'StartKernData'], d[b'StartComposites']
-
-
- @deprecated("3.0", "Use the class AFM instead.")
- def parse_afm(fh):
- return _parse_afm(fh)
-
-
- def _parse_afm(fh):
- """
- Parse the Adobe Font Metrics file in file handle *fh*.
-
- Returns
- -------
- header : dict
- A header dict. See :func:`_parse_header`.
- cmetrics_by_ascii : dict
- From :func:`_parse_char_metrics`.
- cmetrics_by_name : dict
- From :func:`_parse_char_metrics`.
- kernpairs : dict
- From :func:`_parse_kern_pairs`.
- composites : dict
- From :func:`_parse_composites`
-
- """
- _sanity_check(fh)
- header = _parse_header(fh)
- cmetrics_by_ascii, cmetrics_by_name = _parse_char_metrics(fh)
- kernpairs, composites = _parse_optional(fh)
- return header, cmetrics_by_ascii, cmetrics_by_name, kernpairs, composites
-
-
- class AFM(object):
-
- def __init__(self, fh):
- """Parse the AFM file in file object *fh*."""
- (self._header,
- self._metrics,
- self._metrics_by_name,
- self._kern,
- self._composite) = _parse_afm(fh)
-
- def get_bbox_char(self, c, isord=False):
- if not isord:
- c = ord(c)
- return self._metrics[c].bbox
-
- def string_width_height(self, s):
- """
- Return the string width (including kerning) and string height
- as a (*w*, *h*) tuple.
- """
- if not len(s):
- return 0, 0
- total_width = 0
- namelast = None
- miny = 1e9
- maxy = 0
- for c in s:
- if c == '\n':
- continue
- wx, name, bbox = self._metrics[ord(c)]
-
- total_width += wx + self._kern.get((namelast, name), 0)
- l, b, w, h = bbox
- miny = min(miny, b)
- maxy = max(maxy, b + h)
-
- namelast = name
-
- return total_width, maxy - miny
-
- def get_str_bbox_and_descent(self, s):
- """Return the string bounding box and the maximal descent."""
- if not len(s):
- return 0, 0, 0, 0, 0
- total_width = 0
- namelast = None
- miny = 1e9
- maxy = 0
- left = 0
- if not isinstance(s, str):
- s = _to_str(s)
- for c in s:
- if c == '\n':
- continue
- name = uni2type1.get(ord(c), 'question')
- try:
- wx, _, bbox = self._metrics_by_name[name]
- except KeyError:
- name = 'question'
- wx, _, bbox = self._metrics_by_name[name]
- total_width += wx + self._kern.get((namelast, name), 0)
- l, b, w, h = bbox
- left = min(left, l)
- miny = min(miny, b)
- maxy = max(maxy, b + h)
-
- namelast = name
-
- return left, miny, total_width, maxy - miny, -miny
-
- def get_str_bbox(self, s):
- """Return the string bounding box."""
- return self.get_str_bbox_and_descent(s)[:4]
-
- def get_name_char(self, c, isord=False):
- """Get the name of the character, i.e., ';' is 'semicolon'."""
- if not isord:
- c = ord(c)
- return self._metrics[c].name
-
- def get_width_char(self, c, isord=False):
- """
- Get the width of the character from the character metric WX field.
- """
- if not isord:
- c = ord(c)
- return self._metrics[c].width
-
- def get_width_from_char_name(self, name):
- """Get the width of the character from a type1 character name."""
- return self._metrics_by_name[name].width
-
- def get_height_char(self, c, isord=False):
- """Get the bounding box (ink) height of character *c* (space is 0)."""
- if not isord:
- c = ord(c)
- return self._metrics[c].bbox[-1]
-
- def get_kern_dist(self, c1, c2):
- """
- Return the kerning pair distance (possibly 0) for chars *c1* and *c2*.
- """
- name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
- return self.get_kern_dist_from_name(name1, name2)
-
- def get_kern_dist_from_name(self, name1, name2):
- """
- Return the kerning pair distance (possibly 0) for chars
- *name1* and *name2*.
- """
- return self._kern.get((name1, name2), 0)
-
- def get_fontname(self):
- """Return the font name, e.g., 'Times-Roman'."""
- return self._header[b'FontName']
-
- def get_fullname(self):
- """Return the font full name, e.g., 'Times-Roman'."""
- name = self._header.get(b'FullName')
- if name is None: # use FontName as a substitute
- name = self._header[b'FontName']
- return name
-
- def get_familyname(self):
- """Return the font family name, e.g., 'Times'."""
- name = self._header.get(b'FamilyName')
- if name is not None:
- return name
-
- # FamilyName not specified so we'll make a guess
- name = self.get_fullname()
- extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|'
- r'light|ultralight|extra|condensed))+$')
- return re.sub(extras, '', name)
-
- @property
- def family_name(self):
- """The font family name, e.g., 'Times'."""
- return self.get_familyname()
-
- def get_weight(self):
- """Return the font weight, e.g., 'Bold' or 'Roman'."""
- return self._header[b'Weight']
-
- def get_angle(self):
- """Return the fontangle as float."""
- return self._header[b'ItalicAngle']
-
- def get_capheight(self):
- """Return the cap height as float."""
- return self._header[b'CapHeight']
-
- def get_xheight(self):
- """Return the xheight as float."""
- return self._header[b'XHeight']
-
- def get_underline_thickness(self):
- """Return the underline thickness as float."""
- return self._header[b'UnderlineThickness']
-
- def get_horizontal_stem_width(self):
- """
- Return the standard horizontal stem width as float, or *None* if
- not specified in AFM file.
- """
- return self._header.get(b'StdHW', None)
-
- def get_vertical_stem_width(self):
- """
- Return the standard vertical stem width as float, or *None* if
- not specified in AFM file.
- """
- return self._header.get(b'StdVW', None)
|