1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228 |
- # -*- coding: utf-8 -*-
- from fontTools.misc import sstruct
- from fontTools.misc.textTools import (
- bytechr,
- byteord,
- bytesjoin,
- strjoin,
- tobytes,
- tostr,
- safeEval,
- )
- from fontTools.misc.encodingTools import getEncoding
- from fontTools.ttLib import newTable
- from fontTools.ttLib.ttVisitor import TTVisitor
- from fontTools import ttLib
- import fontTools.ttLib.tables.otTables as otTables
- from fontTools.ttLib.tables import C_P_A_L_
- from . import DefaultTable
- import struct
- import logging
- log = logging.getLogger(__name__)
- nameRecordFormat = """
- > # big endian
- platformID: H
- platEncID: H
- langID: H
- nameID: H
- length: H
- offset: H
- """
- nameRecordSize = sstruct.calcsize(nameRecordFormat)
- class table__n_a_m_e(DefaultTable.DefaultTable):
- dependencies = ["ltag"]
- def decompile(self, data, ttFont):
- format, n, stringOffset = struct.unpack(b">HHH", data[:6])
- expectedStringOffset = 6 + n * nameRecordSize
- if stringOffset != expectedStringOffset:
- log.error(
- "'name' table stringOffset incorrect. Expected: %s; Actual: %s",
- expectedStringOffset,
- stringOffset,
- )
- stringData = data[stringOffset:]
- data = data[6:]
- self.names = []
- for i in range(n):
- if len(data) < 12:
- log.error("skipping malformed name record #%d", i)
- continue
- name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
- name.string = stringData[name.offset : name.offset + name.length]
- if name.offset + name.length > len(stringData):
- log.error("skipping malformed name record #%d", i)
- continue
- assert len(name.string) == name.length
- # if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
- # if len(name.string) % 2:
- # print "2-byte string doesn't have even length!"
- # print name.__dict__
- del name.offset, name.length
- self.names.append(name)
- def compile(self, ttFont):
- if not hasattr(self, "names"):
- # only happens when there are NO name table entries read
- # from the TTX file
- self.names = []
- names = self.names
- names.sort() # sort according to the spec; see NameRecord.__lt__()
- stringData = b""
- format = 0
- n = len(names)
- stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
- data = struct.pack(b">HHH", format, n, stringOffset)
- lastoffset = 0
- done = {} # remember the data so we can reuse the "pointers"
- for name in names:
- string = name.toBytes()
- if string in done:
- name.offset, name.length = done[string]
- else:
- name.offset, name.length = done[string] = len(stringData), len(string)
- stringData = bytesjoin([stringData, string])
- data = data + sstruct.pack(nameRecordFormat, name)
- return data + stringData
- def toXML(self, writer, ttFont):
- for name in self.names:
- name.toXML(writer, ttFont)
- def fromXML(self, name, attrs, content, ttFont):
- if name != "namerecord":
- return # ignore unknown tags
- if not hasattr(self, "names"):
- self.names = []
- name = NameRecord()
- self.names.append(name)
- name.fromXML(name, attrs, content, ttFont)
- def getName(self, nameID, platformID, platEncID, langID=None):
- for namerecord in self.names:
- if (
- namerecord.nameID == nameID
- and namerecord.platformID == platformID
- and namerecord.platEncID == platEncID
- ):
- if langID is None or namerecord.langID == langID:
- return namerecord
- return None # not found
- def getDebugName(self, nameID):
- englishName = someName = None
- for name in self.names:
- if name.nameID != nameID:
- continue
- try:
- unistr = name.toUnicode()
- except UnicodeDecodeError:
- continue
- someName = unistr
- if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
- englishName = unistr
- break
- if englishName:
- return englishName
- elif someName:
- return someName
- else:
- return None
- def getFirstDebugName(self, nameIDs):
- for nameID in nameIDs:
- name = self.getDebugName(nameID)
- if name is not None:
- return name
- return None
- def getBestFamilyName(self):
- # 21 = WWS Family Name
- # 16 = Typographic Family Name
- # 1 = Family Name
- return self.getFirstDebugName((21, 16, 1))
- def getBestSubFamilyName(self):
- # 22 = WWS SubFamily Name
- # 17 = Typographic SubFamily Name
- # 2 = SubFamily Name
- return self.getFirstDebugName((22, 17, 2))
- def getBestFullName(self):
- # 4 = Full Name
- # 6 = PostScript Name
- for nameIDs in ((21, 22), (16, 17), (1, 2), (4,), (6,)):
- if len(nameIDs) == 2:
- name_fam = self.getDebugName(nameIDs[0])
- name_subfam = self.getDebugName(nameIDs[1])
- if None in [name_fam, name_subfam]:
- continue # if any is None, skip
- name = f"{name_fam} {name_subfam}"
- if name_subfam.lower() == "regular":
- name = f"{name_fam}"
- return name
- else:
- name = self.getDebugName(nameIDs[0])
- if name is not None:
- return name
- return None
- def setName(self, string, nameID, platformID, platEncID, langID):
- """Set the 'string' for the name record identified by 'nameID', 'platformID',
- 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
- and append to the name table.
- 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
- it is assumed to be already encoded with the correct plaform-specific encoding
- identified by the (platformID, platEncID, langID) triplet. A warning is issued
- to prevent unexpected results.
- """
- if not hasattr(self, "names"):
- self.names = []
- if not isinstance(string, str):
- if isinstance(string, bytes):
- log.warning(
- "name string is bytes, ensure it's correctly encoded: %r", string
- )
- else:
- raise TypeError(
- "expected unicode or bytes, found %s: %r"
- % (type(string).__name__, string)
- )
- namerecord = self.getName(nameID, platformID, platEncID, langID)
- if namerecord:
- namerecord.string = string
- else:
- self.names.append(makeName(string, nameID, platformID, platEncID, langID))
- def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
- """Remove any name records identified by the given combination of 'nameID',
- 'platformID', 'platEncID' and 'langID'.
- """
- args = {
- argName: argValue
- for argName, argValue in (
- ("nameID", nameID),
- ("platformID", platformID),
- ("platEncID", platEncID),
- ("langID", langID),
- )
- if argValue is not None
- }
- if not args:
- # no arguments, nothing to do
- return
- self.names = [
- rec
- for rec in self.names
- if any(
- argValue != getattr(rec, argName) for argName, argValue in args.items()
- )
- ]
- @staticmethod
- def removeUnusedNames(ttFont):
- """Remove any name records which are not in NameID range 0-255 and not utilized
- within the font itself."""
- visitor = NameRecordVisitor()
- visitor.visit(ttFont)
- toDelete = set()
- for record in ttFont["name"].names:
- # Name IDs 26 to 255, inclusive, are reserved for future standard names.
- # https://learn.microsoft.com/en-us/typography/opentype/spec/name#name-ids
- if record.nameID < 256:
- continue
- if record.nameID not in visitor.seen:
- toDelete.add(record.nameID)
- for nameID in toDelete:
- ttFont["name"].removeNames(nameID)
- return toDelete
- def _findUnusedNameID(self, minNameID=256):
- """Finds an unused name id.
- The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
- following the last nameID in the name table.
- """
- names = getattr(self, "names", [])
- nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
- if nameID > 32767:
- raise ValueError("nameID must be less than 32768")
- return nameID
- def findMultilingualName(
- self, names, windows=True, mac=True, minNameID=0, ttFont=None
- ):
- """Return the name ID of an existing multilingual name that
- matches the 'names' dictionary, or None if not found.
- 'names' is a dictionary with the name in multiple languages,
- such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
- The keys can be arbitrary IETF BCP 47 language codes;
- the values are Unicode strings.
- If 'windows' is True, the returned name ID is guaranteed
- exist for all requested languages for platformID=3 and
- platEncID=1.
- If 'mac' is True, the returned name ID is guaranteed to exist
- for all requested languages for platformID=1 and platEncID=0.
- The returned name ID will not be less than the 'minNameID'
- argument.
- """
- # Gather the set of requested
- # (string, platformID, platEncID, langID)
- # tuples
- reqNameSet = set()
- for lang, name in sorted(names.items()):
- if windows:
- windowsName = _makeWindowsName(name, None, lang)
- if windowsName is not None:
- reqNameSet.add(
- (
- windowsName.string,
- windowsName.platformID,
- windowsName.platEncID,
- windowsName.langID,
- )
- )
- if mac:
- macName = _makeMacName(name, None, lang, ttFont)
- if macName is not None:
- reqNameSet.add(
- (
- macName.string,
- macName.platformID,
- macName.platEncID,
- macName.langID,
- )
- )
- # Collect matching name IDs
- matchingNames = dict()
- for name in self.names:
- try:
- key = (name.toUnicode(), name.platformID, name.platEncID, name.langID)
- except UnicodeDecodeError:
- continue
- if key in reqNameSet and name.nameID >= minNameID:
- nameSet = matchingNames.setdefault(name.nameID, set())
- nameSet.add(key)
- # Return the first name ID that defines all requested strings
- for nameID, nameSet in sorted(matchingNames.items()):
- if nameSet == reqNameSet:
- return nameID
- return None # not found
- def addMultilingualName(
- self, names, ttFont=None, nameID=None, windows=True, mac=True, minNameID=0
- ):
- """Add a multilingual name, returning its name ID
- 'names' is a dictionary with the name in multiple languages,
- such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
- The keys can be arbitrary IETF BCP 47 language codes;
- the values are Unicode strings.
- 'ttFont' is the TTFont to which the names are added, or None.
- If present, the font's 'ltag' table can get populated
- to store exotic language codes, which allows encoding
- names that otherwise cannot get encoded at all.
- 'nameID' is the name ID to be used, or None to let the library
- find an existing set of name records that match, or pick an
- unused name ID.
- If 'windows' is True, a platformID=3 name record will be added.
- If 'mac' is True, a platformID=1 name record will be added.
- If the 'nameID' argument is None, the created nameID will not
- be less than the 'minNameID' argument.
- """
- if not hasattr(self, "names"):
- self.names = []
- if nameID is None:
- # Reuse nameID if possible
- nameID = self.findMultilingualName(
- names, windows=windows, mac=mac, minNameID=minNameID, ttFont=ttFont
- )
- if nameID is not None:
- return nameID
- nameID = self._findUnusedNameID()
- # TODO: Should minimize BCP 47 language codes.
- # https://github.com/fonttools/fonttools/issues/930
- for lang, name in sorted(names.items()):
- if windows:
- windowsName = _makeWindowsName(name, nameID, lang)
- if windowsName is not None:
- self.names.append(windowsName)
- else:
- # We cannot not make a Windows name: make sure we add a
- # Mac name as a fallback. This can happen for exotic
- # BCP47 language tags that have no Windows language code.
- mac = True
- if mac:
- macName = _makeMacName(name, nameID, lang, ttFont)
- if macName is not None:
- self.names.append(macName)
- return nameID
- def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
- """Add a new name record containing 'string' for each (platformID, platEncID,
- langID) tuple specified in the 'platforms' list.
- The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
- following the last nameID in the name table.
- If no 'platforms' are specified, two English name records are added, one for the
- Macintosh (platformID=0), and one for the Windows platform (3).
- The 'string' must be a Unicode string, so it can be encoded with different,
- platform-specific encodings.
- Return the new nameID.
- """
- assert (
- len(platforms) > 0
- ), "'platforms' must contain at least one (platformID, platEncID, langID) tuple"
- if not hasattr(self, "names"):
- self.names = []
- if not isinstance(string, str):
- raise TypeError(
- "expected str, found %s: %r" % (type(string).__name__, string)
- )
- nameID = self._findUnusedNameID(minNameID + 1)
- for platformID, platEncID, langID in platforms:
- self.names.append(makeName(string, nameID, platformID, platEncID, langID))
- return nameID
- def makeName(string, nameID, platformID, platEncID, langID):
- name = NameRecord()
- name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
- string,
- nameID,
- platformID,
- platEncID,
- langID,
- )
- return name
- def _makeWindowsName(name, nameID, language):
- """Create a NameRecord for the Microsoft Windows platform
- 'language' is an arbitrary IETF BCP 47 language identifier such
- as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
- does not support the desired language, the result will be None.
- Future versions of fonttools might return a NameRecord for the
- OpenType 'name' table format 1, but this is not implemented yet.
- """
- langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
- if langID is not None:
- return makeName(name, nameID, 3, 1, langID)
- else:
- log.warning(
- "cannot add Windows name in language %s "
- "because fonttools does not yet support "
- "name table format 1" % language
- )
- return None
- def _makeMacName(name, nameID, language, font=None):
- """Create a NameRecord for Apple platforms
- 'language' is an arbitrary IETF BCP 47 language identifier such
- as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
- create a Macintosh NameRecord that is understood by old applications
- (platform ID 1 and an old-style Macintosh language enum). If this
- is not possible, we create a Unicode NameRecord (platform ID 0)
- whose language points to the font’s 'ltag' table. The latter
- can encode any string in any language, but legacy applications
- might not recognize the format (in which case they will ignore
- those names).
- 'font' should be the TTFont for which you want to create a name.
- If 'font' is None, we only return NameRecords for legacy Macintosh;
- in that case, the result will be None for names that need to
- be encoded with an 'ltag' table.
- See the section “The language identifier” in Apple’s specification:
- https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
- """
- macLang = _MAC_LANGUAGE_CODES.get(language.lower())
- macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
- if macLang is not None and macScript is not None:
- encoding = getEncoding(1, macScript, macLang, default="ascii")
- # Check if we can actually encode this name. If we can't,
- # for example because we have no support for the legacy
- # encoding, or because the name string contains Unicode
- # characters that the legacy encoding cannot represent,
- # we fall back to encoding the name in Unicode and put
- # the language tag into the ltag table.
- try:
- _ = tobytes(name, encoding, errors="strict")
- return makeName(name, nameID, 1, macScript, macLang)
- except UnicodeEncodeError:
- pass
- if font is not None:
- ltag = font.tables.get("ltag")
- if ltag is None:
- ltag = font["ltag"] = newTable("ltag")
- # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
- # “The preferred platform-specific code for Unicode would be 3 or 4.”
- # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
- return makeName(name, nameID, 0, 4, ltag.addTag(language))
- else:
- log.warning(
- "cannot store language %s into 'ltag' table "
- "without having access to the TTFont object" % language
- )
- return None
- class NameRecord(object):
- def getEncoding(self, default="ascii"):
- """Returns the Python encoding name for this name entry based on its platformID,
- platEncID, and langID. If encoding for these values is not known, by default
- 'ascii' is returned. That can be overriden by passing a value to the default
- argument.
- """
- return getEncoding(self.platformID, self.platEncID, self.langID, default)
- def encodingIsUnicodeCompatible(self):
- return self.getEncoding(None) in ["utf_16_be", "ucs2be", "ascii", "latin1"]
- def __str__(self):
- return self.toStr(errors="backslashreplace")
- def isUnicode(self):
- return self.platformID == 0 or (
- self.platformID == 3 and self.platEncID in [0, 1, 10]
- )
- def toUnicode(self, errors="strict"):
- """
- If self.string is a Unicode string, return it; otherwise try decoding the
- bytes in self.string to a Unicode string using the encoding of this
- entry as returned by self.getEncoding(); Note that self.getEncoding()
- returns 'ascii' if the encoding is unknown to the library.
- Certain heuristics are performed to recover data from bytes that are
- ill-formed in the chosen encoding, or that otherwise look misencoded
- (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
- but marked otherwise). If the bytes are ill-formed and the heuristics fail,
- the error is handled according to the errors parameter to this function, which is
- passed to the underlying decode() function; by default it throws a
- UnicodeDecodeError exception.
- Note: The mentioned heuristics mean that roundtripping a font to XML and back
- to binary might recover some misencoded data whereas just loading the font
- and saving it back will not change them.
- """
- def isascii(b):
- return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
- encoding = self.getEncoding()
- string = self.string
- if (
- isinstance(string, bytes)
- and encoding == "utf_16_be"
- and len(string) % 2 == 1
- ):
- # Recover badly encoded UTF-16 strings that have an odd number of bytes:
- # - If the last byte is zero, drop it. Otherwise,
- # - If all the odd bytes are zero and all the even bytes are ASCII,
- # prepend one zero byte. Otherwise,
- # - If first byte is zero and all other bytes are ASCII, insert zero
- # bytes between consecutive ASCII bytes.
- #
- # (Yes, I've seen all of these in the wild... sigh)
- if byteord(string[-1]) == 0:
- string = string[:-1]
- elif all(
- byteord(b) == 0 if i % 2 else isascii(byteord(b))
- for i, b in enumerate(string)
- ):
- string = b"\0" + string
- elif byteord(string[0]) == 0 and all(
- isascii(byteord(b)) for b in string[1:]
- ):
- string = bytesjoin(b"\0" + bytechr(byteord(b)) for b in string[1:])
- string = tostr(string, encoding=encoding, errors=errors)
- # If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
- # Fix it up.
- if all(
- ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string)
- ):
- # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
- # narrow it down.
- string = "".join(c for c in string[1::2])
- return string
- def toBytes(self, errors="strict"):
- """If self.string is a bytes object, return it; otherwise try encoding
- the Unicode string in self.string to bytes using the encoding of this
- entry as returned by self.getEncoding(); Note that self.getEncoding()
- returns 'ascii' if the encoding is unknown to the library.
- If the Unicode string cannot be encoded to bytes in the chosen encoding,
- the error is handled according to the errors parameter to this function,
- which is passed to the underlying encode() function; by default it throws a
- UnicodeEncodeError exception.
- """
- return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
- toStr = toUnicode
- def toXML(self, writer, ttFont):
- try:
- unistr = self.toUnicode()
- except UnicodeDecodeError:
- unistr = None
- attrs = [
- ("nameID", self.nameID),
- ("platformID", self.platformID),
- ("platEncID", self.platEncID),
- ("langID", hex(self.langID)),
- ]
- if unistr is None or not self.encodingIsUnicodeCompatible():
- attrs.append(("unicode", unistr is not None))
- writer.begintag("namerecord", attrs)
- writer.newline()
- if unistr is not None:
- writer.write(unistr)
- else:
- writer.write8bit(self.string)
- writer.newline()
- writer.endtag("namerecord")
- writer.newline()
- def fromXML(self, name, attrs, content, ttFont):
- self.nameID = safeEval(attrs["nameID"])
- self.platformID = safeEval(attrs["platformID"])
- self.platEncID = safeEval(attrs["platEncID"])
- self.langID = safeEval(attrs["langID"])
- s = strjoin(content).strip()
- encoding = self.getEncoding()
- if self.encodingIsUnicodeCompatible() or safeEval(
- attrs.get("unicode", "False")
- ):
- self.string = s.encode(encoding)
- else:
- # This is the inverse of write8bit...
- self.string = s.encode("latin1")
- def __lt__(self, other):
- if type(self) != type(other):
- return NotImplemented
- try:
- selfTuple = (
- self.platformID,
- self.platEncID,
- self.langID,
- self.nameID,
- )
- otherTuple = (
- other.platformID,
- other.platEncID,
- other.langID,
- other.nameID,
- )
- except AttributeError:
- # This can only happen for
- # 1) an object that is not a NameRecord, or
- # 2) an unlikely incomplete NameRecord object which has not been
- # fully populated
- return NotImplemented
- try:
- # Include the actual NameRecord string in the comparison tuples
- selfTuple = selfTuple + (self.toBytes(),)
- otherTuple = otherTuple + (other.toBytes(),)
- except UnicodeEncodeError as e:
- # toBytes caused an encoding error in either of the two, so content
- # to sorting based on IDs only
- log.error("NameRecord sorting failed to encode: %s" % e)
- # Implemented so that list.sort() sorts according to the spec by using
- # the order of the tuple items and their comparison
- return selfTuple < otherTuple
- def __repr__(self):
- return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
- self.nameID,
- self.platformID,
- self.langID,
- )
- # Windows language ID → IETF BCP-47 language tag
- #
- # While Microsoft indicates a region/country for all its language
- # IDs, we follow Unicode practice by omitting “most likely subtags”
- # as per Unicode CLDR. For example, English is simply “en” and not
- # “en-Latn” because according to Unicode, the default script
- # for English is Latin.
- #
- # http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
- # http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
- _WINDOWS_LANGUAGES = {
- 0x0436: "af",
- 0x041C: "sq",
- 0x0484: "gsw",
- 0x045E: "am",
- 0x1401: "ar-DZ",
- 0x3C01: "ar-BH",
- 0x0C01: "ar",
- 0x0801: "ar-IQ",
- 0x2C01: "ar-JO",
- 0x3401: "ar-KW",
- 0x3001: "ar-LB",
- 0x1001: "ar-LY",
- 0x1801: "ary",
- 0x2001: "ar-OM",
- 0x4001: "ar-QA",
- 0x0401: "ar-SA",
- 0x2801: "ar-SY",
- 0x1C01: "aeb",
- 0x3801: "ar-AE",
- 0x2401: "ar-YE",
- 0x042B: "hy",
- 0x044D: "as",
- 0x082C: "az-Cyrl",
- 0x042C: "az",
- 0x046D: "ba",
- 0x042D: "eu",
- 0x0423: "be",
- 0x0845: "bn",
- 0x0445: "bn-IN",
- 0x201A: "bs-Cyrl",
- 0x141A: "bs",
- 0x047E: "br",
- 0x0402: "bg",
- 0x0403: "ca",
- 0x0C04: "zh-HK",
- 0x1404: "zh-MO",
- 0x0804: "zh",
- 0x1004: "zh-SG",
- 0x0404: "zh-TW",
- 0x0483: "co",
- 0x041A: "hr",
- 0x101A: "hr-BA",
- 0x0405: "cs",
- 0x0406: "da",
- 0x048C: "prs",
- 0x0465: "dv",
- 0x0813: "nl-BE",
- 0x0413: "nl",
- 0x0C09: "en-AU",
- 0x2809: "en-BZ",
- 0x1009: "en-CA",
- 0x2409: "en-029",
- 0x4009: "en-IN",
- 0x1809: "en-IE",
- 0x2009: "en-JM",
- 0x4409: "en-MY",
- 0x1409: "en-NZ",
- 0x3409: "en-PH",
- 0x4809: "en-SG",
- 0x1C09: "en-ZA",
- 0x2C09: "en-TT",
- 0x0809: "en-GB",
- 0x0409: "en",
- 0x3009: "en-ZW",
- 0x0425: "et",
- 0x0438: "fo",
- 0x0464: "fil",
- 0x040B: "fi",
- 0x080C: "fr-BE",
- 0x0C0C: "fr-CA",
- 0x040C: "fr",
- 0x140C: "fr-LU",
- 0x180C: "fr-MC",
- 0x100C: "fr-CH",
- 0x0462: "fy",
- 0x0456: "gl",
- 0x0437: "ka",
- 0x0C07: "de-AT",
- 0x0407: "de",
- 0x1407: "de-LI",
- 0x1007: "de-LU",
- 0x0807: "de-CH",
- 0x0408: "el",
- 0x046F: "kl",
- 0x0447: "gu",
- 0x0468: "ha",
- 0x040D: "he",
- 0x0439: "hi",
- 0x040E: "hu",
- 0x040F: "is",
- 0x0470: "ig",
- 0x0421: "id",
- 0x045D: "iu",
- 0x085D: "iu-Latn",
- 0x083C: "ga",
- 0x0434: "xh",
- 0x0435: "zu",
- 0x0410: "it",
- 0x0810: "it-CH",
- 0x0411: "ja",
- 0x044B: "kn",
- 0x043F: "kk",
- 0x0453: "km",
- 0x0486: "quc",
- 0x0487: "rw",
- 0x0441: "sw",
- 0x0457: "kok",
- 0x0412: "ko",
- 0x0440: "ky",
- 0x0454: "lo",
- 0x0426: "lv",
- 0x0427: "lt",
- 0x082E: "dsb",
- 0x046E: "lb",
- 0x042F: "mk",
- 0x083E: "ms-BN",
- 0x043E: "ms",
- 0x044C: "ml",
- 0x043A: "mt",
- 0x0481: "mi",
- 0x047A: "arn",
- 0x044E: "mr",
- 0x047C: "moh",
- 0x0450: "mn",
- 0x0850: "mn-CN",
- 0x0461: "ne",
- 0x0414: "nb",
- 0x0814: "nn",
- 0x0482: "oc",
- 0x0448: "or",
- 0x0463: "ps",
- 0x0415: "pl",
- 0x0416: "pt",
- 0x0816: "pt-PT",
- 0x0446: "pa",
- 0x046B: "qu-BO",
- 0x086B: "qu-EC",
- 0x0C6B: "qu",
- 0x0418: "ro",
- 0x0417: "rm",
- 0x0419: "ru",
- 0x243B: "smn",
- 0x103B: "smj-NO",
- 0x143B: "smj",
- 0x0C3B: "se-FI",
- 0x043B: "se",
- 0x083B: "se-SE",
- 0x203B: "sms",
- 0x183B: "sma-NO",
- 0x1C3B: "sms",
- 0x044F: "sa",
- 0x1C1A: "sr-Cyrl-BA",
- 0x0C1A: "sr",
- 0x181A: "sr-Latn-BA",
- 0x081A: "sr-Latn",
- 0x046C: "nso",
- 0x0432: "tn",
- 0x045B: "si",
- 0x041B: "sk",
- 0x0424: "sl",
- 0x2C0A: "es-AR",
- 0x400A: "es-BO",
- 0x340A: "es-CL",
- 0x240A: "es-CO",
- 0x140A: "es-CR",
- 0x1C0A: "es-DO",
- 0x300A: "es-EC",
- 0x440A: "es-SV",
- 0x100A: "es-GT",
- 0x480A: "es-HN",
- 0x080A: "es-MX",
- 0x4C0A: "es-NI",
- 0x180A: "es-PA",
- 0x3C0A: "es-PY",
- 0x280A: "es-PE",
- 0x500A: "es-PR",
- # Microsoft has defined two different language codes for
- # “Spanish with modern sorting” and “Spanish with traditional
- # sorting”. This makes sense for collation APIs, and it would be
- # possible to express this in BCP 47 language tags via Unicode
- # extensions (eg., “es-u-co-trad” is “Spanish with traditional
- # sorting”). However, for storing names in fonts, this distinction
- # does not make sense, so we use “es” in both cases.
- 0x0C0A: "es",
- 0x040A: "es",
- 0x540A: "es-US",
- 0x380A: "es-UY",
- 0x200A: "es-VE",
- 0x081D: "sv-FI",
- 0x041D: "sv",
- 0x045A: "syr",
- 0x0428: "tg",
- 0x085F: "tzm",
- 0x0449: "ta",
- 0x0444: "tt",
- 0x044A: "te",
- 0x041E: "th",
- 0x0451: "bo",
- 0x041F: "tr",
- 0x0442: "tk",
- 0x0480: "ug",
- 0x0422: "uk",
- 0x042E: "hsb",
- 0x0420: "ur",
- 0x0843: "uz-Cyrl",
- 0x0443: "uz",
- 0x042A: "vi",
- 0x0452: "cy",
- 0x0488: "wo",
- 0x0485: "sah",
- 0x0478: "ii",
- 0x046A: "yo",
- }
- _MAC_LANGUAGES = {
- 0: "en",
- 1: "fr",
- 2: "de",
- 3: "it",
- 4: "nl",
- 5: "sv",
- 6: "es",
- 7: "da",
- 8: "pt",
- 9: "no",
- 10: "he",
- 11: "ja",
- 12: "ar",
- 13: "fi",
- 14: "el",
- 15: "is",
- 16: "mt",
- 17: "tr",
- 18: "hr",
- 19: "zh-Hant",
- 20: "ur",
- 21: "hi",
- 22: "th",
- 23: "ko",
- 24: "lt",
- 25: "pl",
- 26: "hu",
- 27: "es",
- 28: "lv",
- 29: "se",
- 30: "fo",
- 31: "fa",
- 32: "ru",
- 33: "zh",
- 34: "nl-BE",
- 35: "ga",
- 36: "sq",
- 37: "ro",
- 38: "cz",
- 39: "sk",
- 40: "sl",
- 41: "yi",
- 42: "sr",
- 43: "mk",
- 44: "bg",
- 45: "uk",
- 46: "be",
- 47: "uz",
- 48: "kk",
- 49: "az-Cyrl",
- 50: "az-Arab",
- 51: "hy",
- 52: "ka",
- 53: "mo",
- 54: "ky",
- 55: "tg",
- 56: "tk",
- 57: "mn-CN",
- 58: "mn",
- 59: "ps",
- 60: "ks",
- 61: "ku",
- 62: "sd",
- 63: "bo",
- 64: "ne",
- 65: "sa",
- 66: "mr",
- 67: "bn",
- 68: "as",
- 69: "gu",
- 70: "pa",
- 71: "or",
- 72: "ml",
- 73: "kn",
- 74: "ta",
- 75: "te",
- 76: "si",
- 77: "my",
- 78: "km",
- 79: "lo",
- 80: "vi",
- 81: "id",
- 82: "tl",
- 83: "ms",
- 84: "ms-Arab",
- 85: "am",
- 86: "ti",
- 87: "om",
- 88: "so",
- 89: "sw",
- 90: "rw",
- 91: "rn",
- 92: "ny",
- 93: "mg",
- 94: "eo",
- 128: "cy",
- 129: "eu",
- 130: "ca",
- 131: "la",
- 132: "qu",
- 133: "gn",
- 134: "ay",
- 135: "tt",
- 136: "ug",
- 137: "dz",
- 138: "jv",
- 139: "su",
- 140: "gl",
- 141: "af",
- 142: "br",
- 143: "iu",
- 144: "gd",
- 145: "gv",
- 146: "ga",
- 147: "to",
- 148: "el-polyton",
- 149: "kl",
- 150: "az",
- 151: "nn",
- }
- _WINDOWS_LANGUAGE_CODES = {
- lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()
- }
- _MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
- # MacOS language ID → MacOS script ID
- #
- # Note that the script ID is not sufficient to determine what encoding
- # to use in TrueType files. For some languages, MacOS used a modification
- # of a mainstream script. For example, an Icelandic name would be stored
- # with smRoman in the TrueType naming table, but the actual encoding
- # is a special Icelandic version of the normal Macintosh Roman encoding.
- # As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
- # Syllables but MacOS had run out of available script codes, so this was
- # done as a (pretty radical) “modification” of Ethiopic.
- #
- # http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
- _MAC_LANGUAGE_TO_SCRIPT = {
- 0: 0, # langEnglish → smRoman
- 1: 0, # langFrench → smRoman
- 2: 0, # langGerman → smRoman
- 3: 0, # langItalian → smRoman
- 4: 0, # langDutch → smRoman
- 5: 0, # langSwedish → smRoman
- 6: 0, # langSpanish → smRoman
- 7: 0, # langDanish → smRoman
- 8: 0, # langPortuguese → smRoman
- 9: 0, # langNorwegian → smRoman
- 10: 5, # langHebrew → smHebrew
- 11: 1, # langJapanese → smJapanese
- 12: 4, # langArabic → smArabic
- 13: 0, # langFinnish → smRoman
- 14: 6, # langGreek → smGreek
- 15: 0, # langIcelandic → smRoman (modified)
- 16: 0, # langMaltese → smRoman
- 17: 0, # langTurkish → smRoman (modified)
- 18: 0, # langCroatian → smRoman (modified)
- 19: 2, # langTradChinese → smTradChinese
- 20: 4, # langUrdu → smArabic
- 21: 9, # langHindi → smDevanagari
- 22: 21, # langThai → smThai
- 23: 3, # langKorean → smKorean
- 24: 29, # langLithuanian → smCentralEuroRoman
- 25: 29, # langPolish → smCentralEuroRoman
- 26: 29, # langHungarian → smCentralEuroRoman
- 27: 29, # langEstonian → smCentralEuroRoman
- 28: 29, # langLatvian → smCentralEuroRoman
- 29: 0, # langSami → smRoman
- 30: 0, # langFaroese → smRoman (modified)
- 31: 4, # langFarsi → smArabic (modified)
- 32: 7, # langRussian → smCyrillic
- 33: 25, # langSimpChinese → smSimpChinese
- 34: 0, # langFlemish → smRoman
- 35: 0, # langIrishGaelic → smRoman (modified)
- 36: 0, # langAlbanian → smRoman
- 37: 0, # langRomanian → smRoman (modified)
- 38: 29, # langCzech → smCentralEuroRoman
- 39: 29, # langSlovak → smCentralEuroRoman
- 40: 0, # langSlovenian → smRoman (modified)
- 41: 5, # langYiddish → smHebrew
- 42: 7, # langSerbian → smCyrillic
- 43: 7, # langMacedonian → smCyrillic
- 44: 7, # langBulgarian → smCyrillic
- 45: 7, # langUkrainian → smCyrillic (modified)
- 46: 7, # langByelorussian → smCyrillic
- 47: 7, # langUzbek → smCyrillic
- 48: 7, # langKazakh → smCyrillic
- 49: 7, # langAzerbaijani → smCyrillic
- 50: 4, # langAzerbaijanAr → smArabic
- 51: 24, # langArmenian → smArmenian
- 52: 23, # langGeorgian → smGeorgian
- 53: 7, # langMoldavian → smCyrillic
- 54: 7, # langKirghiz → smCyrillic
- 55: 7, # langTajiki → smCyrillic
- 56: 7, # langTurkmen → smCyrillic
- 57: 27, # langMongolian → smMongolian
- 58: 7, # langMongolianCyr → smCyrillic
- 59: 4, # langPashto → smArabic
- 60: 4, # langKurdish → smArabic
- 61: 4, # langKashmiri → smArabic
- 62: 4, # langSindhi → smArabic
- 63: 26, # langTibetan → smTibetan
- 64: 9, # langNepali → smDevanagari
- 65: 9, # langSanskrit → smDevanagari
- 66: 9, # langMarathi → smDevanagari
- 67: 13, # langBengali → smBengali
- 68: 13, # langAssamese → smBengali
- 69: 11, # langGujarati → smGujarati
- 70: 10, # langPunjabi → smGurmukhi
- 71: 12, # langOriya → smOriya
- 72: 17, # langMalayalam → smMalayalam
- 73: 16, # langKannada → smKannada
- 74: 14, # langTamil → smTamil
- 75: 15, # langTelugu → smTelugu
- 76: 18, # langSinhalese → smSinhalese
- 77: 19, # langBurmese → smBurmese
- 78: 20, # langKhmer → smKhmer
- 79: 22, # langLao → smLao
- 80: 30, # langVietnamese → smVietnamese
- 81: 0, # langIndonesian → smRoman
- 82: 0, # langTagalog → smRoman
- 83: 0, # langMalayRoman → smRoman
- 84: 4, # langMalayArabic → smArabic
- 85: 28, # langAmharic → smEthiopic
- 86: 28, # langTigrinya → smEthiopic
- 87: 28, # langOromo → smEthiopic
- 88: 0, # langSomali → smRoman
- 89: 0, # langSwahili → smRoman
- 90: 0, # langKinyarwanda → smRoman
- 91: 0, # langRundi → smRoman
- 92: 0, # langNyanja → smRoman
- 93: 0, # langMalagasy → smRoman
- 94: 0, # langEsperanto → smRoman
- 128: 0, # langWelsh → smRoman (modified)
- 129: 0, # langBasque → smRoman
- 130: 0, # langCatalan → smRoman
- 131: 0, # langLatin → smRoman
- 132: 0, # langQuechua → smRoman
- 133: 0, # langGuarani → smRoman
- 134: 0, # langAymara → smRoman
- 135: 7, # langTatar → smCyrillic
- 136: 4, # langUighur → smArabic
- 137: 26, # langDzongkha → smTibetan
- 138: 0, # langJavaneseRom → smRoman
- 139: 0, # langSundaneseRom → smRoman
- 140: 0, # langGalician → smRoman
- 141: 0, # langAfrikaans → smRoman
- 142: 0, # langBreton → smRoman (modified)
- 143: 28, # langInuktitut → smEthiopic (modified)
- 144: 0, # langScottishGaelic → smRoman (modified)
- 145: 0, # langManxGaelic → smRoman (modified)
- 146: 0, # langIrishGaelicScript → smRoman (modified)
- 147: 0, # langTongan → smRoman
- 148: 6, # langGreekAncient → smRoman
- 149: 0, # langGreenlandic → smRoman
- 150: 0, # langAzerbaijanRoman → smRoman
- 151: 0, # langNynorsk → smRoman
- }
- class NameRecordVisitor(TTVisitor):
- # Font tables that have NameIDs we need to collect.
- TABLES = ("GSUB", "GPOS", "fvar", "CPAL", "STAT")
- def __init__(self):
- self.seen = set()
- @NameRecordVisitor.register_attrs(
- (
- (otTables.FeatureParamsSize, ("SubfamilyID", "SubfamilyNameID")),
- (otTables.FeatureParamsStylisticSet, ("UINameID",)),
- (
- otTables.FeatureParamsCharacterVariants,
- (
- "FeatUILabelNameID",
- "FeatUITooltipTextNameID",
- "SampleTextNameID",
- "FirstParamUILabelNameID",
- ),
- ),
- (otTables.STAT, ("ElidedFallbackNameID",)),
- (otTables.AxisRecord, ("AxisNameID",)),
- (otTables.AxisValue, ("ValueNameID",)),
- (otTables.FeatureName, ("FeatureNameID",)),
- (otTables.Setting, ("SettingNameID",)),
- )
- )
- def visit(visitor, obj, attr, value):
- visitor.seen.add(value)
- @NameRecordVisitor.register(ttLib.getTableClass("fvar"))
- def visit(visitor, obj):
- for inst in obj.instances:
- if inst.postscriptNameID != 0xFFFF:
- visitor.seen.add(inst.postscriptNameID)
- visitor.seen.add(inst.subfamilyNameID)
- for axis in obj.axes:
- visitor.seen.add(axis.axisNameID)
- @NameRecordVisitor.register(ttLib.getTableClass("CPAL"))
- def visit(visitor, obj):
- if obj.version == 1:
- visitor.seen.update(obj.paletteLabels)
- visitor.seen.update(obj.paletteEntryLabels)
- @NameRecordVisitor.register(ttLib.TTFont)
- def visit(visitor, font, *args, **kwargs):
- if hasattr(visitor, "font"):
- return False
- visitor.font = font
- for tag in visitor.TABLES:
- if tag in font:
- visitor.visit(font[tag], *args, **kwargs)
- del visitor.font
- return False
|