fonttools · mikekap · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021
diff --git a/.travis.yml b/.travis.yml
@@ -56,6 +56,9 @@ matrix:
     - os: linux
       env:
         - MB_PYTHON_VERSION=3.8
+    - os: linux
+      env:
+        - MB_PYTHON_VERSION=3.9
     - os: linux
       env:
         - MB_PYTHON_VERSION=3.7
@@ -80,6 +83,10 @@ matrix:
       language: generic
       env:
         - MB_PYTHON_VERSION=3.8
+    - os: osx
+      language: generic
+      env:
+        - MB_PYTHON_VERSION=3.9
 
 before_install:
   - source multibuild/common_utils.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 13.0.0-3
+ - Port https://github.com/python/cpython/commit/d134809cd3764c6a634eab7bb8995e3e2eff14d5 to unicodedata2
+ - Port is_normalized to unicodedata2 (https://github.com/python/cpython/commit/2810dd7be9876236f74ac80716d113572c9098dd & https://github.com/python/cpython/commit/2f09413947d1ce0043de62ed2346f9a2b4e5880b)
+ - Port https://github.com/python/cpython/commit/c8c4200b65b2159bbb13cee10d67dfb3676fef26 & friends to follow PEP489
+
 ## 13.0.0
  - Upgrade to Unicode 13.0.0
 

diff --git a/LICENSE b/LICENSE
@@ -186,7 +186,9 @@ Apache License
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright {yyyy} {name of copyright owner}
+   Copyright 2021 unicodedata2 contributors
+   Copyright © 2001-2021 Python Software Foundation; All Rights
+      Reserved
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/appveyor.yml b/appveyor.yml
@@ -41,6 +41,14 @@ environment:
       PYTHON_VERSION: "3.8.x"
       PYTHON_ARCH: "64"
 
+    - PYTHON: "C:\\Python39"
+      PYTHON_VERSION: "3.9.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python39-x64"
+      PYTHON_VERSION: "3.9.x"
+      PYTHON_ARCH: "64"
+
 matrix:
   fast_finish: true
 

diff --git a/multibuild b/multibuild
diff --git a/setup.py b/setup.py
@@ -26,7 +26,7 @@
 
 setup(
     name="unicodedata2",
-    version="13.0.0-2",
+    version="13.0.0-3",
     description="Unicodedata backport for Python 2/3 updated to the latest Unicode version.",
     long_description=long_description,
     long_description_content_type="text/markdown",

diff --git a/tests/test_normalization.py b/tests/test_normalization.py
@@ -0,0 +1,128 @@
+"""
+Taken from cpython test_normalization.py.
+(c) 2021 PSF
+"""
+
+try:
+    from urllib.request import urlretrieve
+except:
+    from urllib import urlretrieve
+import unittest
+
+import sys
+from unicodedata2 import normalize, is_normalized, unidata_version
+
+TESTDATAFILE = "NormalizationTest.txt"
+TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
+
+def check_version(testfile):
+    hdr = testfile.readline()
+    return unidata_version in hdr
+
+class RangeError(Exception):
+    pass
+
+def NFC(str):
+    return normalize("NFC", str)
+
+def NFKC(str):
+    return normalize("NFKC", str)
+
+def NFD(str):
+    return normalize("NFD", str)
+
+def NFKD(str):
+    return normalize("NFKD", str)
+
+chr = chr if sys.version_info[0] >= 3 else unichr
+
+def unistr(data):
+    data = [int(x, 16) for x in data.split(" ")]
+    for x in data:
+        if x > sys.maxunicode:
+            raise RangeError
+    return "".join([chr(x) for x in data])
+
+class NormalizationTest(unittest.TestCase):
+    def test_main(self):
+        # Hit the exception early
+        try:
+            kwargs = {}
+            if sys.version_info[0] >= 3:
+                kwargs['encoding'] = "utf-8"
+            filename, _ = urlretrieve(TESTDATAURL)
+            testdata = open(filename, **kwargs)
+            if not check_version(testdata):
+                raise ValueError('Bad test data file')
+        except OSError:
+            self.fail("Could not retrieve {TESTDATAURL}".format(**globals()))
+
+        with testdata:
+            self.run_normalization_tests(testdata)
+
+    def run_normalization_tests(self, testdata):
+        part = None
+        part1_data = {}
+
+        for line in testdata:
+            if '#' in line:
+                line = line.split('#')[0]
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("@Part"):
+                part = line.split()[0]
+                continue
+            try:
+                c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
+            except RangeError:
+                # Skip unsupported characters;
+                # try at least adding c1 if we are in part1
+                if part == "@Part1":
+                    try:
+                        c1 = unistr(line.split(';')[0])
+                    except RangeError:
+                        pass
+                    else:
+                        part1_data[c1] = 1
+                continue
+
+            # Perform tests
+            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
+            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
+            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
+            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
+            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
+                            NFKC(c3) == NFKC(c4) == NFKC(c5),
+                            line)
+            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
+                            NFKD(c3) == NFKD(c4) == NFKD(c5),
+                            line)
+
+            self.assertTrue(is_normalized("NFC", c2))
+            self.assertTrue(is_normalized("NFC", c4))
+
+            self.assertTrue(is_normalized("NFD", c3))
+            self.assertTrue(is_normalized("NFD", c5))
+
+            self.assertTrue(is_normalized("NFKC", c4))
+            self.assertTrue(is_normalized("NFKD", c5))
+
+            # Record part 1 data
+            if part == "@Part1":
+                part1_data[c1] = 1
+
+        # Perform tests for all other data
+        for c in range(sys.maxunicode+1):
+            X = chr(c)
+            if X in part1_data:
+                continue
+            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
+
+    def test_bug_834676(self):
+        # Check for bug 834676
+        normalize('NFC', u'\ud55c\uae00')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_unicodedata2.py b/tests/test_unicodedata2.py
@@ -199,6 +199,9 @@ def test_issue10254(self):
         b = 'C\u0338' * 20  + '\xC7'
         self.assertEqual(self.db.normalize('NFC', a), b)
 
+    # For tests of unicodedata.is_normalized / self.db.is_normalized ,
+    # see test_normalization.py .
+
     def test_east_asian_width(self):
         eaw = self.db.east_asian_width
         self.assertRaises(TypeError, eaw, b'a')
@@ -217,6 +220,20 @@ def test_east_asian_width_9_0_changes(self):
         self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
         self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
 
+    # Taken from https://github.com/python/cpython/commit/d134809cd3764c6a634eab7bb8995e3e2eff14d5
+    def test_issue29456(self):
+        # Fix #29456
+        u1176_str_a = '\u1100\u1176\u11a8'
+        u1176_str_b = '\u1100\u1176\u11a8'
+        u11a7_str_a = '\u1100\u1175\u11a7'
+        u11a7_str_b = '\uae30\u11a7'
+        u11c3_str_a = '\u1100\u1175\u11c3'
+        u11c3_str_b = '\uae30\u11c3'
+        self.assertEqual(self.db.normalize('NFC', u1176_str_a), u1176_str_b)
+        self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
+        self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)
+
+
 class UnicodeMiscTest(UnicodeDatabaseTest):
 
     def test_decimal_numeric_consistent(self):
+3 −3		.appveyor.yml
+24 −6		.travis.yml
+1 −1		LICENSE
+93 −31		README.rst
+85 −23		common_utils.sh
+23 −10		configure_build.sh
+3 −1		docker_build_wrap.sh
+72 −28		library_builders.sh
+17 −3		manylinux_utils.sh
+131 −14		osx_utils.sh
+3 −0		tests/config.sh
+11 −0		tests/test_common_utils.sh
+3 −1		tests/test_fill_pyver.sh
+6 −5		tests/test_library_builders.sh
+21 −7		tests/test_multibuild.sh
+7 −1		tests/test_osx_utils.sh
+15 −5		tests/test_python_install.sh
+1 −3		travis_linux_steps.sh
+14 −0		travis_osx_steps.sh
+1 −1		travis_steps.sh