quality_unicode.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import re
  2. import fnmatch
  3. message_unicode_B = \
  4. "File contains a unicode character : %s, line %s. " \
  5. "But not in the whitelist. " \
  6. "Add the file to the whitelist in " + __file__
  7. message_unicode_D = \
  8. "File does not contain a unicode character : %s." \
  9. "but is in the whitelist. " \
  10. "Remove the file from the whitelist in " + __file__
  11. encoding_header_re = re.compile(
  12. r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
  13. # Whitelist pattern for files which can have unicode.
  14. unicode_whitelist = [
  15. # Author names can include non-ASCII characters
  16. r'*/bin/authors_update.py',
  17. r'*/bin/mailmap_check.py',
  18. # These files have functions and test functions for unicode input and
  19. # output.
  20. r'*/sympy/testing/tests/test_code_quality.py',
  21. r'*/sympy/physics/vector/tests/test_printing.py',
  22. r'*/physics/quantum/tests/test_printing.py',
  23. r'*/sympy/vector/tests/test_printing.py',
  24. r'*/sympy/parsing/tests/test_sympy_parser.py',
  25. r'*/sympy/printing/pretty/tests/test_pretty.py',
  26. r'*/sympy/printing/tests/test_conventions.py',
  27. r'*/sympy/printing/tests/test_preview.py',
  28. r'*/liealgebras/type_g.py',
  29. r'*/liealgebras/weyl_group.py',
  30. r'*/liealgebras/tests/test_type_G.py',
  31. # wigner.py and polarization.py have unicode doctests. These probably
  32. # don't need to be there but some of the examples that are there are
  33. # pretty ugly without use_unicode (matrices need to be wrapped across
  34. # multiple lines etc)
  35. r'*/sympy/physics/wigner.py',
  36. r'*/sympy/physics/optics/polarization.py',
  37. # joint.py uses some unicode for variable names in the docstrings
  38. r'*/sympy/physics/mechanics/joint.py',
  39. # lll method has unicode in docstring references and author name
  40. r'*/sympy/polys/matrices/domainmatrix.py',
  41. ]
  42. unicode_strict_whitelist = [
  43. r'*/sympy/parsing/latex/_antlr/__init__.py',
  44. # test_mathematica.py uses some unicode for testing Greek characters are working #24055
  45. r'*/sympy/parsing/tests/test_mathematica.py',
  46. ]
  47. def _test_this_file_encoding(
  48. fname, test_file,
  49. unicode_whitelist=unicode_whitelist,
  50. unicode_strict_whitelist=unicode_strict_whitelist):
  51. """Test helper function for unicode test
  52. The test may have to operate on filewise manner, so it had moved
  53. to a separate process.
  54. """
  55. has_unicode = False
  56. is_in_whitelist = False
  57. is_in_strict_whitelist = False
  58. for patt in unicode_whitelist:
  59. if fnmatch.fnmatch(fname, patt):
  60. is_in_whitelist = True
  61. break
  62. for patt in unicode_strict_whitelist:
  63. if fnmatch.fnmatch(fname, patt):
  64. is_in_strict_whitelist = True
  65. is_in_whitelist = True
  66. break
  67. if is_in_whitelist:
  68. for idx, line in enumerate(test_file):
  69. try:
  70. line.encode(encoding='ascii')
  71. except (UnicodeEncodeError, UnicodeDecodeError):
  72. has_unicode = True
  73. if not has_unicode and not is_in_strict_whitelist:
  74. assert False, message_unicode_D % fname
  75. else:
  76. for idx, line in enumerate(test_file):
  77. try:
  78. line.encode(encoding='ascii')
  79. except (UnicodeEncodeError, UnicodeDecodeError):
  80. assert False, message_unicode_B % (fname, idx + 1)