Generateurv2/backend/env/lib/python3.10/site-packages/sympy/testing/quality_unicode.py
2022-06-24 17:14:37 +02:00

137 lines
4.7 KiB
Python

import re
import fnmatch
# XXX Python 2 unicode import test.
# May remove after deprecating python 2.7.
message_unicode_A = \
"File contains a unicode character : %s, line %s. " \
"But with no encoding header. " \
"See https://www.python.org/dev/peps/pep-0263/ " \
"and add '# coding=utf-8'"
message_unicode_B = \
"File contains a unicode character : %s, line %s. " \
"But not in the whitelist. " \
"Add the file to the whitelist in " + __file__
message_unicode_C = \
"File contains a unicode character : %s, line %s. " \
"And is in the whitelist, but without the encoding header. " \
"See https://www.python.org/dev/peps/pep-0263/ " \
"and add '# coding=utf-8'."
message_unicode_D = \
"File does not contain a unicode character : %s." \
"but is in the whitelist. " \
"Remove the file from the whitelist in " + __file__
message_unicode_E = \
"File does not contain a unicode character : %s." \
"but contains the header '# coding=utf-8' or equivalent." \
"Remove the header."
encoding_header_re = re.compile(
r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
# Whitelist pattern for files which can have unicode.
unicode_whitelist = [
# Author names can include non-ASCII characters
r'*/bin/authors_update.py',
# These files have functions and test functions for unicode input and
# output.
r'*/sympy/testing/tests/test_code_quality.py',
r'*/sympy/physics/vector/tests/test_printing.py',
r'*/physics/quantum/tests/test_printing.py',
r'*/sympy/vector/tests/test_printing.py',
r'*/sympy/parsing/tests/test_sympy_parser.py',
r'*/sympy/printing/pretty/tests/test_pretty.py',
r'*/sympy/printing/tests/test_conventions.py',
r'*/sympy/printing/tests/test_preview.py',
r'*/liealgebras/type_g.py',
r'*/liealgebras/weyl_group.py',
r'*/liealgebras/tests/test_type_G.py',
# wigner.py and polarization.py have unicode doctests. These probably
# don't need to be there but some of the examples that are there are
# pretty ugly without use_unicode (matrices need to be wrapped across
# multiple lines etc)
r'*/sympy/physics/wigner.py',
r'*/sympy/physics/optics/polarization.py',
# joint.py uses some unicode for variable names in the docstrings
r'*/sympy/physics/mechanics/joint.py',
]
unicode_strict_whitelist = [
r'*/sympy/parsing/latex/_antlr/__init__.py',
]
def _test_this_file_encoding(
fname, test_file,
unicode_whitelist=unicode_whitelist,
unicode_strict_whitelist=unicode_strict_whitelist):
"""Test helper function for python 2 importability test
This test checks whether the file has
# coding=utf-8
or
# -*- coding: utf-8 -*-
line if there is a unicode character in the code
The test may have to operate on filewise manner, so it had moved
to a separate process.
May remove after deprecating python 2.7.
"""
has_coding_utf8 = False
has_unicode = False
is_in_whitelist = False
is_in_strict_whitelist = False
for patt in unicode_whitelist:
if fnmatch.fnmatch(fname, patt):
is_in_whitelist = True
break
for patt in unicode_strict_whitelist:
if fnmatch.fnmatch(fname, patt):
is_in_strict_whitelist = True
is_in_whitelist = True
break
if is_in_whitelist:
for idx, line in enumerate(test_file):
if idx in (0, 1):
match = encoding_header_re.match(line)
if match and match.group(1).lower() == 'utf-8':
has_coding_utf8 = True
try:
line.encode(encoding='ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
has_unicode = True
if has_coding_utf8 is False:
assert False, \
message_unicode_C % (fname, idx + 1)
if not has_unicode and not is_in_strict_whitelist:
assert False, message_unicode_D % fname
else:
for idx, line in enumerate(test_file):
if idx in (0, 1):
match = encoding_header_re.match(line)
if match and match.group(1).lower() == 'utf-8':
has_coding_utf8 = True
try:
line.encode(encoding='ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
has_unicode = True
if has_coding_utf8:
assert False, \
message_unicode_B % (fname, idx + 1)
else:
assert False, \
message_unicode_A % (fname, idx + 1)
if not has_unicode and has_coding_utf8:
assert False, \
message_unicode_E % fname