Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port several things from cpython here #39

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
12f3380
Port python/cpython#d134809cd3764c6a634eab7bb8995e3e2eff14d5
mikekap Mar 8, 2021
2052436
Tests and changelog entry
mikekap Mar 8, 2021
f5bbe50
Add support for is_normalized in py3
mikekap Mar 8, 2021
67d029b
more changelog
mikekap Mar 8, 2021
223f8e8
Port 530f506ac91338b55cf2be71b1cdf50cb077512f
mikekap Mar 8, 2021
af1801e
Port 2f09413947d1ce0043de62ed2346f9a2b4e5880b
mikekap Mar 8, 2021
d87910c
more changelog
mikekap Mar 8, 2021
63c6c44
Port 7669cb8b21c7c9cef758609c44017c09d1ce4658
mikekap Mar 8, 2021
923127a
Port 2ec70102066fe5534f1a62e8f496d2005e1697db
mikekap Mar 8, 2021
846d5e3
Port 982307b9cceef36e30ac43b13032d68c3b921adc
mikekap Mar 8, 2021
26151d1
Port cd8295ff758891f21084a6a5ad3403d35dda38f7
mikekap Mar 8, 2021
d8877c8
Port ddc0dd001a4224274ba6f83568b45a1dd88c6fc6
mikekap Mar 8, 2021
0caffa2
Port e6b8c5263a7fcf5b95d0fd4c900e5949eeb6630d
mikekap Mar 8, 2021
7a14bc7
Port 47e1afd2a1793b5818a16c41307a4ce976331649
mikekap Mar 8, 2021
f48c57a
Port 920cb647ba23feab7987d0dac1bd63bfc2ffc4c0
mikekap Mar 8, 2021
ac518ac
Port c8c4200b65b2159bbb13cee10d67dfb3676fef26
mikekap Mar 8, 2021
6928be4
Py2 ports & fixes
mikekap Mar 8, 2021
7d93965
Add the proper license
mikekap Mar 8, 2021
79333a1
that was really annoying
mikekap Mar 8, 2021
eb87bc1
what year is it - declarations at the top?
mikekap Mar 8, 2021
6f331e0
no builtins
mikekap Mar 8, 2021
c5d8bfb
Maybe try updating multibuild
mikekap Mar 8, 2021
9a47644
Add python 3.9 targets
mikekap Mar 8, 2021
848233f
huh
mikekap Mar 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ matrix:
- os: linux
env:
- MB_PYTHON_VERSION=3.8
- os: linux
env:
- MB_PYTHON_VERSION=3.9
- os: linux
env:
- MB_PYTHON_VERSION=3.7
Expand All @@ -80,6 +83,10 @@ matrix:
language: generic
env:
- MB_PYTHON_VERSION=3.8
- os: osx
language: generic
env:
- MB_PYTHON_VERSION=3.9

before_install:
- source multibuild/common_utils.sh
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 13.0.0-3
- Port https://github.com/python/cpython/commit/d134809cd3764c6a634eab7bb8995e3e2eff14d5 to unicodedata2
- Port is_normalized to unicodedata2 (https://github.com/python/cpython/commit/2810dd7be9876236f74ac80716d113572c9098dd & https://github.com/python/cpython/commit/2f09413947d1ce0043de62ed2346f9a2b4e5880b)
- Port https://github.com/python/cpython/commit/c8c4200b65b2159bbb13cee10d67dfb3676fef26 & friends to follow PEP489

## 13.0.0
- Upgrade to Unicode 13.0.0

Expand Down
4 changes: 3 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ Apache License
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright {yyyy} {name of copyright owner}
Copyright 2021 unicodedata2 contributors
Copyright © 2001-2021 Python Software Foundation; All Rights
Reserved

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
8 changes: 8 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ environment:
PYTHON_VERSION: "3.8.x"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python39"
PYTHON_VERSION: "3.9.x"
PYTHON_ARCH: "32"

- PYTHON: "C:\\Python39-x64"
PYTHON_VERSION: "3.9.x"
PYTHON_ARCH: "64"

matrix:
fast_finish: true

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setup(
name="unicodedata2",
version="13.0.0-2",
version="13.0.0-3",
description="Unicodedata backport for Python 2/3 updated to the latest Unicode version.",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
128 changes: 128 additions & 0 deletions tests/test_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
Taken from cpython test_normalization.py.
(c) 2021 PSF
"""

try:
from urllib.request import urlretrieve
except:
from urllib import urlretrieve
import unittest

import sys
from unicodedata2 import normalize, is_normalized, unidata_version

TESTDATAFILE = "NormalizationTest.txt"
TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE

def check_version(testfile):
hdr = testfile.readline()
return unidata_version in hdr

class RangeError(Exception):
pass

def NFC(str):
return normalize("NFC", str)

def NFKC(str):
return normalize("NFKC", str)

def NFD(str):
return normalize("NFD", str)

def NFKD(str):
return normalize("NFKD", str)

chr = chr if sys.version_info[0] >= 3 else unichr

def unistr(data):
data = [int(x, 16) for x in data.split(" ")]
for x in data:
if x > sys.maxunicode:
raise RangeError
return "".join([chr(x) for x in data])

class NormalizationTest(unittest.TestCase):
def test_main(self):
# Hit the exception early
try:
kwargs = {}
if sys.version_info[0] >= 3:
kwargs['encoding'] = "utf-8"
filename, _ = urlretrieve(TESTDATAURL)
testdata = open(filename, **kwargs)
if not check_version(testdata):
raise ValueError('Bad test data file')
except OSError:
self.fail("Could not retrieve {TESTDATAURL}".format(**globals()))

with testdata:
self.run_normalization_tests(testdata)

def run_normalization_tests(self, testdata):
part = None
part1_data = {}

for line in testdata:
if '#' in line:
line = line.split('#')[0]
line = line.strip()
if not line:
continue
if line.startswith("@Part"):
part = line.split()[0]
continue
try:
c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
except RangeError:
# Skip unsupported characters;
# try at least adding c1 if we are in part1
if part == "@Part1":
try:
c1 = unistr(line.split(';')[0])
except RangeError:
pass
else:
part1_data[c1] = 1
continue

# Perform tests
self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
NFKC(c3) == NFKC(c4) == NFKC(c5),
line)
self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
NFKD(c3) == NFKD(c4) == NFKD(c5),
line)

self.assertTrue(is_normalized("NFC", c2))
self.assertTrue(is_normalized("NFC", c4))

self.assertTrue(is_normalized("NFD", c3))
self.assertTrue(is_normalized("NFD", c5))

self.assertTrue(is_normalized("NFKC", c4))
self.assertTrue(is_normalized("NFKD", c5))

# Record part 1 data
if part == "@Part1":
part1_data[c1] = 1

# Perform tests for all other data
for c in range(sys.maxunicode+1):
X = chr(c)
if X in part1_data:
continue
self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)

def test_bug_834676(self):
# Check for bug 834676
normalize('NFC', u'\ud55c\uae00')


if __name__ == "__main__":
unittest.main()
17 changes: 17 additions & 0 deletions tests/test_unicodedata2.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ def test_issue10254(self):
b = 'C\u0338' * 20 + '\xC7'
self.assertEqual(self.db.normalize('NFC', a), b)

# For tests of unicodedata.is_normalized / self.db.is_normalized ,
# see test_normalization.py .

def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, b'a')
Expand All @@ -217,6 +220,20 @@ def test_east_asian_width_9_0_changes(self):
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')

# Taken from https://github.com/python/cpython/commit/d134809cd3764c6a634eab7bb8995e3e2eff14d5
def test_issue29456(self):
# Fix #29456
u1176_str_a = '\u1100\u1176\u11a8'
u1176_str_b = '\u1100\u1176\u11a8'
u11a7_str_a = '\u1100\u1175\u11a7'
u11a7_str_b = '\uae30\u11a7'
u11c3_str_a = '\u1100\u1175\u11c3'
u11c3_str_b = '\uae30\u11c3'
self.assertEqual(self.db.normalize('NFC', u1176_str_a), u1176_str_b)
self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)


class UnicodeMiscTest(UnicodeDatabaseTest):

def test_decimal_numeric_consistent(self):
Expand Down
Loading