O_S_2f_2.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. from fontTools.misc import sstruct
  2. from fontTools.misc.roundTools import otRound
  3. from fontTools.misc.textTools import safeEval, num2binary, binary2num
  4. from fontTools.ttLib.tables import DefaultTable
  5. import bisect
  6. import logging
  7. log = logging.getLogger(__name__)
  8. # panose classification
  9. panoseFormat = """
  10. bFamilyType: B
  11. bSerifStyle: B
  12. bWeight: B
  13. bProportion: B
  14. bContrast: B
  15. bStrokeVariation: B
  16. bArmStyle: B
  17. bLetterForm: B
  18. bMidline: B
  19. bXHeight: B
  20. """
  21. class Panose(object):
  22. def __init__(self, **kwargs):
  23. _, names, _ = sstruct.getformat(panoseFormat)
  24. for name in names:
  25. setattr(self, name, kwargs.pop(name, 0))
  26. for k in kwargs:
  27. raise TypeError(f"Panose() got an unexpected keyword argument {k!r}")
  28. def toXML(self, writer, ttFont):
  29. formatstring, names, fixes = sstruct.getformat(panoseFormat)
  30. for name in names:
  31. writer.simpletag(name, value=getattr(self, name))
  32. writer.newline()
  33. def fromXML(self, name, attrs, content, ttFont):
  34. setattr(self, name, safeEval(attrs["value"]))
  35. # 'sfnt' OS/2 and Windows Metrics table - 'OS/2'
  36. OS2_format_0 = """
  37. > # big endian
  38. version: H # version
  39. xAvgCharWidth: h # average character width
  40. usWeightClass: H # degree of thickness of strokes
  41. usWidthClass: H # aspect ratio
  42. fsType: H # type flags
  43. ySubscriptXSize: h # subscript horizontal font size
  44. ySubscriptYSize: h # subscript vertical font size
  45. ySubscriptXOffset: h # subscript x offset
  46. ySubscriptYOffset: h # subscript y offset
  47. ySuperscriptXSize: h # superscript horizontal font size
  48. ySuperscriptYSize: h # superscript vertical font size
  49. ySuperscriptXOffset: h # superscript x offset
  50. ySuperscriptYOffset: h # superscript y offset
  51. yStrikeoutSize: h # strikeout size
  52. yStrikeoutPosition: h # strikeout position
  53. sFamilyClass: h # font family class and subclass
  54. panose: 10s # panose classification number
  55. ulUnicodeRange1: L # character range
  56. ulUnicodeRange2: L # character range
  57. ulUnicodeRange3: L # character range
  58. ulUnicodeRange4: L # character range
  59. achVendID: 4s # font vendor identification
  60. fsSelection: H # font selection flags
  61. usFirstCharIndex: H # first unicode character index
  62. usLastCharIndex: H # last unicode character index
  63. sTypoAscender: h # typographic ascender
  64. sTypoDescender: h # typographic descender
  65. sTypoLineGap: h # typographic line gap
  66. usWinAscent: H # Windows ascender
  67. usWinDescent: H # Windows descender
  68. """
  69. OS2_format_1_addition = """
  70. ulCodePageRange1: L
  71. ulCodePageRange2: L
  72. """
  73. OS2_format_2_addition = (
  74. OS2_format_1_addition
  75. + """
  76. sxHeight: h
  77. sCapHeight: h
  78. usDefaultChar: H
  79. usBreakChar: H
  80. usMaxContext: H
  81. """
  82. )
  83. OS2_format_5_addition = (
  84. OS2_format_2_addition
  85. + """
  86. usLowerOpticalPointSize: H
  87. usUpperOpticalPointSize: H
  88. """
  89. )
  90. bigendian = " > # big endian\n"
  91. OS2_format_1 = OS2_format_0 + OS2_format_1_addition
  92. OS2_format_2 = OS2_format_0 + OS2_format_2_addition
  93. OS2_format_5 = OS2_format_0 + OS2_format_5_addition
  94. OS2_format_1_addition = bigendian + OS2_format_1_addition
  95. OS2_format_2_addition = bigendian + OS2_format_2_addition
  96. OS2_format_5_addition = bigendian + OS2_format_5_addition
  97. class table_O_S_2f_2(DefaultTable.DefaultTable):
  98. """the OS/2 table"""
  99. dependencies = ["head"]
  100. def decompile(self, data, ttFont):
  101. dummy, data = sstruct.unpack2(OS2_format_0, data, self)
  102. if self.version == 1:
  103. dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self)
  104. elif self.version in (2, 3, 4):
  105. dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self)
  106. elif self.version == 5:
  107. dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self)
  108. self.usLowerOpticalPointSize /= 20
  109. self.usUpperOpticalPointSize /= 20
  110. elif self.version != 0:
  111. from fontTools import ttLib
  112. raise ttLib.TTLibError(
  113. "unknown format for OS/2 table: version %s" % self.version
  114. )
  115. if len(data):
  116. log.warning("too much 'OS/2' table data")
  117. self.panose = sstruct.unpack(panoseFormat, self.panose, Panose())
  118. def compile(self, ttFont):
  119. self.updateFirstAndLastCharIndex(ttFont)
  120. panose = self.panose
  121. head = ttFont["head"]
  122. if (self.fsSelection & 1) and not (head.macStyle & 1 << 1):
  123. log.warning(
  124. "fsSelection bit 0 (italic) and "
  125. "head table macStyle bit 1 (italic) should match"
  126. )
  127. if (self.fsSelection & 1 << 5) and not (head.macStyle & 1):
  128. log.warning(
  129. "fsSelection bit 5 (bold) and "
  130. "head table macStyle bit 0 (bold) should match"
  131. )
  132. if (self.fsSelection & 1 << 6) and (self.fsSelection & 1 + (1 << 5)):
  133. log.warning(
  134. "fsSelection bit 6 (regular) is set, "
  135. "bits 0 (italic) and 5 (bold) must be clear"
  136. )
  137. if self.version < 4 and self.fsSelection & 0b1110000000:
  138. log.warning(
  139. "fsSelection bits 7, 8 and 9 are only defined in "
  140. "OS/2 table version 4 and up: version %s",
  141. self.version,
  142. )
  143. self.panose = sstruct.pack(panoseFormat, self.panose)
  144. if self.version == 0:
  145. data = sstruct.pack(OS2_format_0, self)
  146. elif self.version == 1:
  147. data = sstruct.pack(OS2_format_1, self)
  148. elif self.version in (2, 3, 4):
  149. data = sstruct.pack(OS2_format_2, self)
  150. elif self.version == 5:
  151. d = self.__dict__.copy()
  152. d["usLowerOpticalPointSize"] = round(self.usLowerOpticalPointSize * 20)
  153. d["usUpperOpticalPointSize"] = round(self.usUpperOpticalPointSize * 20)
  154. data = sstruct.pack(OS2_format_5, d)
  155. else:
  156. from fontTools import ttLib
  157. raise ttLib.TTLibError(
  158. "unknown format for OS/2 table: version %s" % self.version
  159. )
  160. self.panose = panose
  161. return data
  162. def toXML(self, writer, ttFont):
  163. writer.comment(
  164. "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n"
  165. "will be recalculated by the compiler"
  166. )
  167. writer.newline()
  168. if self.version == 1:
  169. format = OS2_format_1
  170. elif self.version in (2, 3, 4):
  171. format = OS2_format_2
  172. elif self.version == 5:
  173. format = OS2_format_5
  174. else:
  175. format = OS2_format_0
  176. formatstring, names, fixes = sstruct.getformat(format)
  177. for name in names:
  178. value = getattr(self, name)
  179. if name == "panose":
  180. writer.begintag("panose")
  181. writer.newline()
  182. value.toXML(writer, ttFont)
  183. writer.endtag("panose")
  184. elif name in (
  185. "ulUnicodeRange1",
  186. "ulUnicodeRange2",
  187. "ulUnicodeRange3",
  188. "ulUnicodeRange4",
  189. "ulCodePageRange1",
  190. "ulCodePageRange2",
  191. ):
  192. writer.simpletag(name, value=num2binary(value))
  193. elif name in ("fsType", "fsSelection"):
  194. writer.simpletag(name, value=num2binary(value, 16))
  195. elif name == "achVendID":
  196. writer.simpletag(name, value=repr(value)[1:-1])
  197. else:
  198. writer.simpletag(name, value=value)
  199. writer.newline()
  200. def fromXML(self, name, attrs, content, ttFont):
  201. if name == "panose":
  202. self.panose = panose = Panose()
  203. for element in content:
  204. if isinstance(element, tuple):
  205. name, attrs, content = element
  206. panose.fromXML(name, attrs, content, ttFont)
  207. elif name in (
  208. "ulUnicodeRange1",
  209. "ulUnicodeRange2",
  210. "ulUnicodeRange3",
  211. "ulUnicodeRange4",
  212. "ulCodePageRange1",
  213. "ulCodePageRange2",
  214. "fsType",
  215. "fsSelection",
  216. ):
  217. setattr(self, name, binary2num(attrs["value"]))
  218. elif name == "achVendID":
  219. setattr(self, name, safeEval("'''" + attrs["value"] + "'''"))
  220. else:
  221. setattr(self, name, safeEval(attrs["value"]))
  222. def updateFirstAndLastCharIndex(self, ttFont):
  223. if "cmap" not in ttFont:
  224. return
  225. codes = set()
  226. for table in getattr(ttFont["cmap"], "tables", []):
  227. if table.isUnicode():
  228. codes.update(table.cmap.keys())
  229. if codes:
  230. minCode = min(codes)
  231. maxCode = max(codes)
  232. # USHORT cannot hold codepoints greater than 0xFFFF
  233. self.usFirstCharIndex = min(0xFFFF, minCode)
  234. self.usLastCharIndex = min(0xFFFF, maxCode)
  235. # misspelled attributes kept for legacy reasons
  236. @property
  237. def usMaxContex(self):
  238. return self.usMaxContext
  239. @usMaxContex.setter
  240. def usMaxContex(self, value):
  241. self.usMaxContext = value
  242. @property
  243. def fsFirstCharIndex(self):
  244. return self.usFirstCharIndex
  245. @fsFirstCharIndex.setter
  246. def fsFirstCharIndex(self, value):
  247. self.usFirstCharIndex = value
  248. @property
  249. def fsLastCharIndex(self):
  250. return self.usLastCharIndex
  251. @fsLastCharIndex.setter
  252. def fsLastCharIndex(self, value):
  253. self.usLastCharIndex = value
  254. def getUnicodeRanges(self):
  255. """Return the set of 'ulUnicodeRange*' bits currently enabled."""
  256. bits = set()
  257. ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2
  258. ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4
  259. for i in range(32):
  260. if ul1 & (1 << i):
  261. bits.add(i)
  262. if ul2 & (1 << i):
  263. bits.add(i + 32)
  264. if ul3 & (1 << i):
  265. bits.add(i + 64)
  266. if ul4 & (1 << i):
  267. bits.add(i + 96)
  268. return bits
  269. def setUnicodeRanges(self, bits):
  270. """Set the 'ulUnicodeRange*' fields to the specified 'bits'."""
  271. ul1, ul2, ul3, ul4 = 0, 0, 0, 0
  272. for bit in bits:
  273. if 0 <= bit < 32:
  274. ul1 |= 1 << bit
  275. elif 32 <= bit < 64:
  276. ul2 |= 1 << (bit - 32)
  277. elif 64 <= bit < 96:
  278. ul3 |= 1 << (bit - 64)
  279. elif 96 <= bit < 123:
  280. ul4 |= 1 << (bit - 96)
  281. else:
  282. raise ValueError("expected 0 <= int <= 122, found: %r" % bit)
  283. self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2
  284. self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4
  285. def recalcUnicodeRanges(self, ttFont, pruneOnly=False):
  286. """Intersect the codepoints in the font's Unicode cmap subtables with
  287. the Unicode block ranges defined in the OpenType specification (v1.7),
  288. and set the respective 'ulUnicodeRange*' bits if there is at least ONE
  289. intersection.
  290. If 'pruneOnly' is True, only clear unused bits with NO intersection.
  291. """
  292. unicodes = set()
  293. for table in ttFont["cmap"].tables:
  294. if table.isUnicode():
  295. unicodes.update(table.cmap.keys())
  296. if pruneOnly:
  297. empty = intersectUnicodeRanges(unicodes, inverse=True)
  298. bits = self.getUnicodeRanges() - empty
  299. else:
  300. bits = intersectUnicodeRanges(unicodes)
  301. self.setUnicodeRanges(bits)
  302. return bits
  303. def getCodePageRanges(self):
  304. """Return the set of 'ulCodePageRange*' bits currently enabled."""
  305. bits = set()
  306. if self.version < 1:
  307. return bits
  308. ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
  309. for i in range(32):
  310. if ul1 & (1 << i):
  311. bits.add(i)
  312. if ul2 & (1 << i):
  313. bits.add(i + 32)
  314. return bits
  315. def setCodePageRanges(self, bits):
  316. """Set the 'ulCodePageRange*' fields to the specified 'bits'."""
  317. ul1, ul2 = 0, 0
  318. for bit in bits:
  319. if 0 <= bit < 32:
  320. ul1 |= 1 << bit
  321. elif 32 <= bit < 64:
  322. ul2 |= 1 << (bit - 32)
  323. else:
  324. raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
  325. if self.version < 1:
  326. self.version = 1
  327. self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
  328. def recalcCodePageRanges(self, ttFont, pruneOnly=False):
  329. unicodes = set()
  330. for table in ttFont["cmap"].tables:
  331. if table.isUnicode():
  332. unicodes.update(table.cmap.keys())
  333. bits = calcCodePageRanges(unicodes)
  334. if pruneOnly:
  335. bits &= self.getCodePageRanges()
  336. # when no codepage ranges can be enabled, fall back to enabling bit 0
  337. # (Latin 1) so that the font works in MS Word:
  338. # https://github.com/googlei18n/fontmake/issues/468
  339. if not bits:
  340. bits = {0}
  341. self.setCodePageRanges(bits)
  342. return bits
  343. def recalcAvgCharWidth(self, ttFont):
  344. """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
  345. Set it to 0 if the unlikely event 'hmtx' table is not found.
  346. """
  347. avg_width = 0
  348. hmtx = ttFont.get("hmtx")
  349. if hmtx is not None:
  350. widths = [width for width, _ in hmtx.metrics.values() if width > 0]
  351. if widths:
  352. avg_width = otRound(sum(widths) / len(widths))
  353. self.xAvgCharWidth = avg_width
  354. return avg_width
  355. # Unicode ranges data from the OpenType OS/2 table specification v1.7
  356. OS2_UNICODE_RANGES = (
  357. (("Basic Latin", (0x0000, 0x007F)),),
  358. (("Latin-1 Supplement", (0x0080, 0x00FF)),),
  359. (("Latin Extended-A", (0x0100, 0x017F)),),
  360. (("Latin Extended-B", (0x0180, 0x024F)),),
  361. (
  362. ("IPA Extensions", (0x0250, 0x02AF)),
  363. ("Phonetic Extensions", (0x1D00, 0x1D7F)),
  364. ("Phonetic Extensions Supplement", (0x1D80, 0x1DBF)),
  365. ),
  366. (
  367. ("Spacing Modifier Letters", (0x02B0, 0x02FF)),
  368. ("Modifier Tone Letters", (0xA700, 0xA71F)),
  369. ),
  370. (
  371. ("Combining Diacritical Marks", (0x0300, 0x036F)),
  372. ("Combining Diacritical Marks Supplement", (0x1DC0, 0x1DFF)),
  373. ),
  374. (("Greek and Coptic", (0x0370, 0x03FF)),),
  375. (("Coptic", (0x2C80, 0x2CFF)),),
  376. (
  377. ("Cyrillic", (0x0400, 0x04FF)),
  378. ("Cyrillic Supplement", (0x0500, 0x052F)),
  379. ("Cyrillic Extended-A", (0x2DE0, 0x2DFF)),
  380. ("Cyrillic Extended-B", (0xA640, 0xA69F)),
  381. ),
  382. (("Armenian", (0x0530, 0x058F)),),
  383. (("Hebrew", (0x0590, 0x05FF)),),
  384. (("Vai", (0xA500, 0xA63F)),),
  385. (("Arabic", (0x0600, 0x06FF)), ("Arabic Supplement", (0x0750, 0x077F))),
  386. (("NKo", (0x07C0, 0x07FF)),),
  387. (("Devanagari", (0x0900, 0x097F)),),
  388. (("Bengali", (0x0980, 0x09FF)),),
  389. (("Gurmukhi", (0x0A00, 0x0A7F)),),
  390. (("Gujarati", (0x0A80, 0x0AFF)),),
  391. (("Oriya", (0x0B00, 0x0B7F)),),
  392. (("Tamil", (0x0B80, 0x0BFF)),),
  393. (("Telugu", (0x0C00, 0x0C7F)),),
  394. (("Kannada", (0x0C80, 0x0CFF)),),
  395. (("Malayalam", (0x0D00, 0x0D7F)),),
  396. (("Thai", (0x0E00, 0x0E7F)),),
  397. (("Lao", (0x0E80, 0x0EFF)),),
  398. (("Georgian", (0x10A0, 0x10FF)), ("Georgian Supplement", (0x2D00, 0x2D2F))),
  399. (("Balinese", (0x1B00, 0x1B7F)),),
  400. (("Hangul Jamo", (0x1100, 0x11FF)),),
  401. (
  402. ("Latin Extended Additional", (0x1E00, 0x1EFF)),
  403. ("Latin Extended-C", (0x2C60, 0x2C7F)),
  404. ("Latin Extended-D", (0xA720, 0xA7FF)),
  405. ),
  406. (("Greek Extended", (0x1F00, 0x1FFF)),),
  407. (
  408. ("General Punctuation", (0x2000, 0x206F)),
  409. ("Supplemental Punctuation", (0x2E00, 0x2E7F)),
  410. ),
  411. (("Superscripts And Subscripts", (0x2070, 0x209F)),),
  412. (("Currency Symbols", (0x20A0, 0x20CF)),),
  413. (("Combining Diacritical Marks For Symbols", (0x20D0, 0x20FF)),),
  414. (("Letterlike Symbols", (0x2100, 0x214F)),),
  415. (("Number Forms", (0x2150, 0x218F)),),
  416. (
  417. ("Arrows", (0x2190, 0x21FF)),
  418. ("Supplemental Arrows-A", (0x27F0, 0x27FF)),
  419. ("Supplemental Arrows-B", (0x2900, 0x297F)),
  420. ("Miscellaneous Symbols and Arrows", (0x2B00, 0x2BFF)),
  421. ),
  422. (
  423. ("Mathematical Operators", (0x2200, 0x22FF)),
  424. ("Supplemental Mathematical Operators", (0x2A00, 0x2AFF)),
  425. ("Miscellaneous Mathematical Symbols-A", (0x27C0, 0x27EF)),
  426. ("Miscellaneous Mathematical Symbols-B", (0x2980, 0x29FF)),
  427. ),
  428. (("Miscellaneous Technical", (0x2300, 0x23FF)),),
  429. (("Control Pictures", (0x2400, 0x243F)),),
  430. (("Optical Character Recognition", (0x2440, 0x245F)),),
  431. (("Enclosed Alphanumerics", (0x2460, 0x24FF)),),
  432. (("Box Drawing", (0x2500, 0x257F)),),
  433. (("Block Elements", (0x2580, 0x259F)),),
  434. (("Geometric Shapes", (0x25A0, 0x25FF)),),
  435. (("Miscellaneous Symbols", (0x2600, 0x26FF)),),
  436. (("Dingbats", (0x2700, 0x27BF)),),
  437. (("CJK Symbols And Punctuation", (0x3000, 0x303F)),),
  438. (("Hiragana", (0x3040, 0x309F)),),
  439. (
  440. ("Katakana", (0x30A0, 0x30FF)),
  441. ("Katakana Phonetic Extensions", (0x31F0, 0x31FF)),
  442. ),
  443. (("Bopomofo", (0x3100, 0x312F)), ("Bopomofo Extended", (0x31A0, 0x31BF))),
  444. (("Hangul Compatibility Jamo", (0x3130, 0x318F)),),
  445. (("Phags-pa", (0xA840, 0xA87F)),),
  446. (("Enclosed CJK Letters And Months", (0x3200, 0x32FF)),),
  447. (("CJK Compatibility", (0x3300, 0x33FF)),),
  448. (("Hangul Syllables", (0xAC00, 0xD7AF)),),
  449. (("Non-Plane 0 *", (0xD800, 0xDFFF)),),
  450. (("Phoenician", (0x10900, 0x1091F)),),
  451. (
  452. ("CJK Unified Ideographs", (0x4E00, 0x9FFF)),
  453. ("CJK Radicals Supplement", (0x2E80, 0x2EFF)),
  454. ("Kangxi Radicals", (0x2F00, 0x2FDF)),
  455. ("Ideographic Description Characters", (0x2FF0, 0x2FFF)),
  456. ("CJK Unified Ideographs Extension A", (0x3400, 0x4DBF)),
  457. ("CJK Unified Ideographs Extension B", (0x20000, 0x2A6DF)),
  458. ("Kanbun", (0x3190, 0x319F)),
  459. ),
  460. (("Private Use Area (plane 0)", (0xE000, 0xF8FF)),),
  461. (
  462. ("CJK Strokes", (0x31C0, 0x31EF)),
  463. ("CJK Compatibility Ideographs", (0xF900, 0xFAFF)),
  464. ("CJK Compatibility Ideographs Supplement", (0x2F800, 0x2FA1F)),
  465. ),
  466. (("Alphabetic Presentation Forms", (0xFB00, 0xFB4F)),),
  467. (("Arabic Presentation Forms-A", (0xFB50, 0xFDFF)),),
  468. (("Combining Half Marks", (0xFE20, 0xFE2F)),),
  469. (
  470. ("Vertical Forms", (0xFE10, 0xFE1F)),
  471. ("CJK Compatibility Forms", (0xFE30, 0xFE4F)),
  472. ),
  473. (("Small Form Variants", (0xFE50, 0xFE6F)),),
  474. (("Arabic Presentation Forms-B", (0xFE70, 0xFEFF)),),
  475. (("Halfwidth And Fullwidth Forms", (0xFF00, 0xFFEF)),),
  476. (("Specials", (0xFFF0, 0xFFFF)),),
  477. (("Tibetan", (0x0F00, 0x0FFF)),),
  478. (("Syriac", (0x0700, 0x074F)),),
  479. (("Thaana", (0x0780, 0x07BF)),),
  480. (("Sinhala", (0x0D80, 0x0DFF)),),
  481. (("Myanmar", (0x1000, 0x109F)),),
  482. (
  483. ("Ethiopic", (0x1200, 0x137F)),
  484. ("Ethiopic Supplement", (0x1380, 0x139F)),
  485. ("Ethiopic Extended", (0x2D80, 0x2DDF)),
  486. ),
  487. (("Cherokee", (0x13A0, 0x13FF)),),
  488. (("Unified Canadian Aboriginal Syllabics", (0x1400, 0x167F)),),
  489. (("Ogham", (0x1680, 0x169F)),),
  490. (("Runic", (0x16A0, 0x16FF)),),
  491. (("Khmer", (0x1780, 0x17FF)), ("Khmer Symbols", (0x19E0, 0x19FF))),
  492. (("Mongolian", (0x1800, 0x18AF)),),
  493. (("Braille Patterns", (0x2800, 0x28FF)),),
  494. (("Yi Syllables", (0xA000, 0xA48F)), ("Yi Radicals", (0xA490, 0xA4CF))),
  495. (
  496. ("Tagalog", (0x1700, 0x171F)),
  497. ("Hanunoo", (0x1720, 0x173F)),
  498. ("Buhid", (0x1740, 0x175F)),
  499. ("Tagbanwa", (0x1760, 0x177F)),
  500. ),
  501. (("Old Italic", (0x10300, 0x1032F)),),
  502. (("Gothic", (0x10330, 0x1034F)),),
  503. (("Deseret", (0x10400, 0x1044F)),),
  504. (
  505. ("Byzantine Musical Symbols", (0x1D000, 0x1D0FF)),
  506. ("Musical Symbols", (0x1D100, 0x1D1FF)),
  507. ("Ancient Greek Musical Notation", (0x1D200, 0x1D24F)),
  508. ),
  509. (("Mathematical Alphanumeric Symbols", (0x1D400, 0x1D7FF)),),
  510. (
  511. ("Private Use (plane 15)", (0xF0000, 0xFFFFD)),
  512. ("Private Use (plane 16)", (0x100000, 0x10FFFD)),
  513. ),
  514. (
  515. ("Variation Selectors", (0xFE00, 0xFE0F)),
  516. ("Variation Selectors Supplement", (0xE0100, 0xE01EF)),
  517. ),
  518. (("Tags", (0xE0000, 0xE007F)),),
  519. (("Limbu", (0x1900, 0x194F)),),
  520. (("Tai Le", (0x1950, 0x197F)),),
  521. (("New Tai Lue", (0x1980, 0x19DF)),),
  522. (("Buginese", (0x1A00, 0x1A1F)),),
  523. (("Glagolitic", (0x2C00, 0x2C5F)),),
  524. (("Tifinagh", (0x2D30, 0x2D7F)),),
  525. (("Yijing Hexagram Symbols", (0x4DC0, 0x4DFF)),),
  526. (("Syloti Nagri", (0xA800, 0xA82F)),),
  527. (
  528. ("Linear B Syllabary", (0x10000, 0x1007F)),
  529. ("Linear B Ideograms", (0x10080, 0x100FF)),
  530. ("Aegean Numbers", (0x10100, 0x1013F)),
  531. ),
  532. (("Ancient Greek Numbers", (0x10140, 0x1018F)),),
  533. (("Ugaritic", (0x10380, 0x1039F)),),
  534. (("Old Persian", (0x103A0, 0x103DF)),),
  535. (("Shavian", (0x10450, 0x1047F)),),
  536. (("Osmanya", (0x10480, 0x104AF)),),
  537. (("Cypriot Syllabary", (0x10800, 0x1083F)),),
  538. (("Kharoshthi", (0x10A00, 0x10A5F)),),
  539. (("Tai Xuan Jing Symbols", (0x1D300, 0x1D35F)),),
  540. (
  541. ("Cuneiform", (0x12000, 0x123FF)),
  542. ("Cuneiform Numbers and Punctuation", (0x12400, 0x1247F)),
  543. ),
  544. (("Counting Rod Numerals", (0x1D360, 0x1D37F)),),
  545. (("Sundanese", (0x1B80, 0x1BBF)),),
  546. (("Lepcha", (0x1C00, 0x1C4F)),),
  547. (("Ol Chiki", (0x1C50, 0x1C7F)),),
  548. (("Saurashtra", (0xA880, 0xA8DF)),),
  549. (("Kayah Li", (0xA900, 0xA92F)),),
  550. (("Rejang", (0xA930, 0xA95F)),),
  551. (("Cham", (0xAA00, 0xAA5F)),),
  552. (("Ancient Symbols", (0x10190, 0x101CF)),),
  553. (("Phaistos Disc", (0x101D0, 0x101FF)),),
  554. (
  555. ("Carian", (0x102A0, 0x102DF)),
  556. ("Lycian", (0x10280, 0x1029F)),
  557. ("Lydian", (0x10920, 0x1093F)),
  558. ),
  559. (("Domino Tiles", (0x1F030, 0x1F09F)), ("Mahjong Tiles", (0x1F000, 0x1F02F))),
  560. )
  561. _unicodeStarts = []
  562. _unicodeValues = [None]
  563. def _getUnicodeRanges():
  564. # build the ranges of codepoints for each unicode range bit, and cache result
  565. if not _unicodeStarts:
  566. unicodeRanges = [
  567. (start, (stop, bit))
  568. for bit, blocks in enumerate(OS2_UNICODE_RANGES)
  569. for _, (start, stop) in blocks
  570. ]
  571. for start, (stop, bit) in sorted(unicodeRanges):
  572. _unicodeStarts.append(start)
  573. _unicodeValues.append((stop, bit))
  574. return _unicodeStarts, _unicodeValues
  575. def intersectUnicodeRanges(unicodes, inverse=False):
  576. """Intersect a sequence of (int) Unicode codepoints with the Unicode block
  577. ranges defined in the OpenType specification v1.7, and return the set of
  578. 'ulUnicodeRanges' bits for which there is at least ONE intersection.
  579. If 'inverse' is True, return the the bits for which there is NO intersection.
  580. >>> intersectUnicodeRanges([0x0410]) == {9}
  581. True
  582. >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122}
  583. True
  584. >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == (
  585. ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122})
  586. True
  587. """
  588. unicodes = set(unicodes)
  589. unicodestarts, unicodevalues = _getUnicodeRanges()
  590. bits = set()
  591. for code in unicodes:
  592. stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)]
  593. if code <= stop:
  594. bits.add(bit)
  595. # The spec says that bit 57 ("Non Plane 0") implies that there's
  596. # at least one codepoint beyond the BMP; so I also include all
  597. # the non-BMP codepoints here
  598. if any(0x10000 <= code < 0x110000 for code in unicodes):
  599. bits.add(57)
  600. return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
  601. def calcCodePageRanges(unicodes):
  602. """Given a set of Unicode codepoints (integers), calculate the
  603. corresponding OS/2 CodePage range bits.
  604. This is a direct translation of FontForge implementation:
  605. https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
  606. """
  607. bits = set()
  608. hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
  609. hasLineart = ord("┤") in unicodes
  610. for uni in unicodes:
  611. if uni == ord("Þ") and hasAscii:
  612. bits.add(0) # Latin 1
  613. elif uni == ord("Ľ") and hasAscii:
  614. bits.add(1) # Latin 2: Eastern Europe
  615. if hasLineart:
  616. bits.add(58) # Latin 2
  617. elif uni == ord("Б"):
  618. bits.add(2) # Cyrillic
  619. if ord("Ѕ") in unicodes and hasLineart:
  620. bits.add(57) # IBM Cyrillic
  621. if ord("╜") in unicodes and hasLineart:
  622. bits.add(49) # MS-DOS Russian
  623. elif uni == ord("Ά"):
  624. bits.add(3) # Greek
  625. if hasLineart and ord("½") in unicodes:
  626. bits.add(48) # IBM Greek
  627. if hasLineart and ord("√") in unicodes:
  628. bits.add(60) # Greek, former 437 G
  629. elif uni == ord("İ") and hasAscii:
  630. bits.add(4) # Turkish
  631. if hasLineart:
  632. bits.add(56) # IBM turkish
  633. elif uni == ord("א"):
  634. bits.add(5) # Hebrew
  635. if hasLineart and ord("√") in unicodes:
  636. bits.add(53) # Hebrew
  637. elif uni == ord("ر"):
  638. bits.add(6) # Arabic
  639. if ord("√") in unicodes:
  640. bits.add(51) # Arabic
  641. if hasLineart:
  642. bits.add(61) # Arabic; ASMO 708
  643. elif uni == ord("ŗ") and hasAscii:
  644. bits.add(7) # Windows Baltic
  645. if hasLineart:
  646. bits.add(59) # MS-DOS Baltic
  647. elif uni == ord("₫") and hasAscii:
  648. bits.add(8) # Vietnamese
  649. elif uni == ord("ๅ"):
  650. bits.add(16) # Thai
  651. elif uni == ord("エ"):
  652. bits.add(17) # JIS/Japan
  653. elif uni == ord("ㄅ"):
  654. bits.add(18) # Chinese: Simplified
  655. elif uni == ord("ㄱ"):
  656. bits.add(19) # Korean wansung
  657. elif uni == ord("央"):
  658. bits.add(20) # Chinese: Traditional
  659. elif uni == ord("곴"):
  660. bits.add(21) # Korean Johab
  661. elif uni == ord("♥") and hasAscii:
  662. bits.add(30) # OEM Character Set
  663. # TODO: Symbol bit has a special meaning (check the spec), we need
  664. # to confirm if this is wanted by default.
  665. # elif chr(0xF000) <= char <= chr(0xF0FF):
  666. # codepageRanges.add(31) # Symbol Character Set
  667. elif uni == ord("þ") and hasAscii and hasLineart:
  668. bits.add(54) # MS-DOS Icelandic
  669. elif uni == ord("╚") and hasAscii:
  670. bits.add(62) # WE/Latin 1
  671. bits.add(63) # US
  672. elif hasAscii and hasLineart and ord("√") in unicodes:
  673. if uni == ord("Å"):
  674. bits.add(50) # MS-DOS Nordic
  675. elif uni == ord("é"):
  676. bits.add(52) # MS-DOS Canadian French
  677. elif uni == ord("õ"):
  678. bits.add(55) # MS-DOS Portuguese
  679. if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes:
  680. bits.add(29) # Macintosh Character Set (US Roman)
  681. return bits
  682. if __name__ == "__main__":
  683. import doctest, sys
  684. sys.exit(doctest.testmod().failed)