Synchronize with CPython 3.7.14 C-API usage, backport #58

SnoopJ · 2022-09-23T18:41:24Z

This PR updates C API usage in unicodedata2 to better match CPython upstream. See #56
Also included a handful of bugfixes from later versions of CPython.

More specifically, this changeset aims for a best-effort match to C API usage as of CPython v3.7.14, see expandable section below for a diff of the residual differences.

This changeset passes the test suite (including new tests) on CPython 3.7-3.10 and PyPy 3.7-3.9. I would be happy to update tox.ini for testing against these versions if desired. The tests do also pass on CPython 3.6 (but not on PyPy 3.6)

Expand for diff of `unicodedata.c` vs v3.7.14

diff --git a/unicodedata2/unicodedata.c b/home/jgerity/repos/cpython/Modules/unicodedata.c
index ebb0b7e..e8788f5 100644
--- a/unicodedata2/unicodedata.c
+++ b/home/jgerity/repos/cpython/Modules/unicodedata.c
@@ -16,18 +16,8 @@
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
-#ifndef PYPY_VERSION
-#if PY_MINOR_VERSION < 10
 #include "ucnhash.h"
-#else
-#define Py_BUILD_CORE
-#include "internal/pycore_ucnhash.h"
-#endif
-#endif
 #include "structmember.h"
-#include "unicodectype.h"
-
-#include "_unicodedata2_compat.h"
 
 /*[clinic input]
 module unicodedata
@@ -84,7 +74,7 @@ typedef struct previous_version {
     Py_UCS4 (*normalization)(Py_UCS4);
 } PreviousDBVersion;
 
-#include "unicodedata.c.h"
+#include "clinic/unicodedata.c.h"
 
 #define get_old_record(self, v)    ((((PreviousDBVersion*)self)->getrecord)(v))
 
@@ -152,7 +142,7 @@ unicodedata_UCD_decimal_impl(PyObject *self, int chr,
     }
 
     if (!have_old)
-        rc = _PyUnicode2_ToDecimalDigit(c);
+        rc = Py_UNICODE_TODECIMAL(c);
     if (rc < 0) {
         if (default_value == NULL) {
             PyErr_SetString(PyExc_ValueError,
@@ -188,7 +178,7 @@ unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value)
 {
     long rc;
     Py_UCS4 c = (Py_UCS4)chr;
-    rc = _PyUnicode2_ToDigit(c);
+    rc = Py_UNICODE_TODIGIT(c);
     if (rc < 0) {
         if (default_value == NULL) {
             PyErr_SetString(PyExc_ValueError, "not a digit");
@@ -240,7 +230,7 @@ unicodedata_UCD_numeric_impl(PyObject *self, int chr,
     }
 
     if (!have_old)
-        rc = _PyUnicode2_ToNumeric(c);
+        rc = Py_UNICODE_TONUMERIC(c);
     if (rc == -1.0) {
         if (default_value == NULL) {
             PyErr_SetString(PyExc_ValueError, "not a numeric character");
@@ -934,15 +924,13 @@ static int
 is_unified_ideograph(Py_UCS4 code)
 {
     return
-        (0x3400 <= code && code <= 0x4DBF)   || /* CJK Ideograph Extension A */
-        (0x4E00 <= code && code <= 0x9FFF)   || /* CJK Ideograph */
-        (0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
-        (0x2A700 <= code && code <= 0x2B739) || /* CJK Ideograph Extension C */
+        (0x3400 <= code && code <= 0x4DB5)   || /* CJK Ideograph Extension A */
+        (0x4E00 <= code && code <= 0x9FEF)   || /* CJK Ideograph */
+        (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
+        (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
         (0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
         (0x2B820 <= code && code <= 0x2CEA1) || /* CJK Ideograph Extension E */
-        (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
-        (0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
-        (0x31350 <= code && code <= 0x323AF);   /* CJK Ideograph Extension H */
+        (0x2CEB0 <= code && code <= 0x2EBEF);   /* CJK Ideograph Extension F */
 }
 
 /* macros used to determine if the given code point is in the PUA range that
@@ -961,7 +949,7 @@ _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
     int offset;
     int i;
     int word;
-    const unsigned char* w;
+    unsigned char* w;
 
     if (code >= 0x110000)
         return 0;
@@ -1183,14 +1171,14 @@ _getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
             incr = incr ^ code_poly;
     }
 }
-/*
+
 static const _PyUnicode_Name_CAPI hashAPI =
 {
     sizeof(_PyUnicode_Name_CAPI),
     _getucname,
     _getcode
 };
-*/
+
 /* -------------------------------------------------------------------- */
 /* Python bindings */
 
@@ -1291,7 +1279,7 @@ static PyTypeObject UCD_Type = {
         /* The ob_type field must be initialized in the module init function
          * to be portable to Windows without using C++. */
         PyVarObject_HEAD_INIT(NULL, 0)
-        "unicodedata2.UCD",              /*tp_name*/
+        "unicodedata.UCD",              /*tp_name*/
         sizeof(PreviousDBVersion),      /*tp_basicsize*/
         0,                      /*tp_itemsize*/
         /* methods */
@@ -1344,7 +1332,7 @@ UnicodeData File Format " UNIDATA_VERSION ".");
 
 static struct PyModuleDef unicodedatamodule = {
         PyModuleDef_HEAD_INIT,
-        "unicodedata2",
+        "unicodedata",
         unicodedata_docstring,
         -1,
         unicodedata_functions,
@@ -1355,7 +1343,7 @@ static struct PyModuleDef unicodedatamodule = {
 };
 
 PyMODINIT_FUNC
-PyInit_unicodedata2(void)
+PyInit_unicodedata(void)
 {
     PyObject *m, *v;
 
@@ -1375,9 +1363,9 @@ PyInit_unicodedata2(void)
         PyModule_AddObject(m, "ucd_3_2_0", v);
 
     /* Export C API */
-    // v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
-    // if (v != NULL)
-    //     PyModule_AddObject(m, "ucnhash_CAPI", v);
+    v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
+    if (v != NULL)
+        PyModule_AddObject(m, "ucnhash_CAPI", v);
     return m;
 }

This may duplicate some of what's in #39, but that looks like it's abandoned

Upstream changes from: python/cpython@d5890c8#diff-6b9cc290ba46b071368fc3669a3be47b96bacdd08e84e34ad234235e3b74d244R252-R266

Upstream changes from: python/cpython@279f446#diff-59c1e610d58bb87487b55fdaa508712daf753d5ee1b036f047b53c63721f9091

Upstream changes from: python/cpython@a5293b4#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Upstream changes from: python/cpython@d134809

Upstream changes from:python/cpython@90aa764#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Upstream changes from: python/cpython@6359641#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Upstream changes from: python/cpython@f8d7d41#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Upstream changes from: python/cpython@2d9f252#diff-17ec60c9bf5044a66fffc2f5bd5b6c4a6df192332bd2762ddf4336a80f8f8f25

Upstream changes from: python/cpython@93cbca3#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue3811 ) python/cpython@1b08b30#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue5828 ) python/cpython@71efeb7#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue4971 ) python/cpython@806d8cf#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue7643 )

Upstream changes from: python/cpython@9c197bc

SnoopJ · 2022-09-23T18:47:08Z

When I initially set out to write this PR, I was hoping that I could achieve better synchronization with the leading-edge upstream changes to unicodedata.c.h, where there have been some substantial changes (see diff below) to the code generated by CPython's Argument Clinic. These changes aren't horribly to support with the compatibility header for CPython, but there are enough new symbols involved that PyPy does not have that this should be a separate issue.

Expand for a diff of `unicodedata.c.h` against v3.10.7

diff --git a/unicodedata2/unicodedata.c.h b/home/jgerity/repos/cpython/Modules/clinic/unicodedata.c.h
index 524505d..4251db2 100644
--- a/unicodedata2/unicodedata.c.h
+++ b/home/jgerity/repos/cpython/Modules/clinic/unicodedata.c.h
@@ -13,23 +13,39 @@ PyDoc_STRVAR(unicodedata_UCD_decimal__doc__,
 "ValueError is raised.");
 
 #define UNICODEDATA_UCD_DECIMAL_METHODDEF    \
-    {"decimal", (PyCFunction)unicodedata_UCD_decimal, METH_VARARGS, unicodedata_UCD_decimal__doc__},
+    {"decimal", (PyCFunction)(void(*)(void))unicodedata_UCD_decimal, METH_FASTCALL, unicodedata_UCD_decimal__doc__},
 
 static PyObject *
 unicodedata_UCD_decimal_impl(PyObject *self, int chr,
                              PyObject *default_value);
 
 static PyObject *
-unicodedata_UCD_decimal(PyObject *self, PyObject *args)
+unicodedata_UCD_decimal(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     int chr;
     PyObject *default_value = NULL;
 
-    if (!PyArg_ParseTuple(args, "C|O:decimal",
-        &chr, &default_value)) {
+    if (!_PyArg_CheckPositional("decimal", nargs, 1, 2)) {
         goto exit;
     }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("decimal", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[0])) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+        _PyArg_BadArgument("decimal", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(args[0], 0);
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    default_value = args[1];
+skip_optional:
     return_value = unicodedata_UCD_decimal_impl(self, chr, default_value);
 
 exit:
@@ -47,22 +63,38 @@ PyDoc_STRVAR(unicodedata_UCD_digit__doc__,
 "ValueError is raised.");
 
 #define UNICODEDATA_UCD_DIGIT_METHODDEF    \
-    {"digit", (PyCFunction)unicodedata_UCD_digit, METH_VARARGS, unicodedata_UCD_digit__doc__},
+    {"digit", (PyCFunction)(void(*)(void))unicodedata_UCD_digit, METH_FASTCALL, unicodedata_UCD_digit__doc__},
 
 static PyObject *
 unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value);
 
 static PyObject *
-unicodedata_UCD_digit(PyObject *self, PyObject *args)
+unicodedata_UCD_digit(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     int chr;
     PyObject *default_value = NULL;
 
-    if (!PyArg_ParseTuple(args, "C|O:digit",
-        &chr, &default_value)) {
+    if (!_PyArg_CheckPositional("digit", nargs, 1, 2)) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("digit", "argument 1", "a unicode character", args[0]);
         goto exit;
     }
+    if (PyUnicode_READY(args[0])) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+        _PyArg_BadArgument("digit", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(args[0], 0);
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    default_value = args[1];
+skip_optional:
     return_value = unicodedata_UCD_digit_impl(self, chr, default_value);
 
 exit:
@@ -80,23 +112,39 @@ PyDoc_STRVAR(unicodedata_UCD_numeric__doc__,
 "ValueError is raised.");
 
 #define UNICODEDATA_UCD_NUMERIC_METHODDEF    \
-    {"numeric", (PyCFunction)unicodedata_UCD_numeric, METH_VARARGS, unicodedata_UCD_numeric__doc__},
+    {"numeric", (PyCFunction)(void(*)(void))unicodedata_UCD_numeric, METH_FASTCALL, unicodedata_UCD_numeric__doc__},
 
 static PyObject *
 unicodedata_UCD_numeric_impl(PyObject *self, int chr,
                              PyObject *default_value);
 
 static PyObject *
-unicodedata_UCD_numeric(PyObject *self, PyObject *args)
+unicodedata_UCD_numeric(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     int chr;
     PyObject *default_value = NULL;
 
-    if (!PyArg_ParseTuple(args, "C|O:numeric",
-        &chr, &default_value)) {
+    if (!_PyArg_CheckPositional("numeric", nargs, 1, 2)) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("numeric", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[0])) {
         goto exit;
     }
+    if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+        _PyArg_BadArgument("numeric", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(args[0], 0);
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    default_value = args[1];
+skip_optional:
     return_value = unicodedata_UCD_numeric_impl(self, chr, default_value);
 
 exit:
@@ -121,9 +169,18 @@ unicodedata_UCD_category(PyObject *self, PyObject *arg)
     PyObject *return_value = NULL;
     int chr;
 
-    if (!PyArg_Parse(arg, "C:category", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("category", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_READY(arg)) {
         goto exit;
     }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("category", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     return_value = unicodedata_UCD_category_impl(self, chr);
 
 exit:
@@ -150,9 +207,18 @@ unicodedata_UCD_bidirectional(PyObject *self, PyObject *arg)
     PyObject *return_value = NULL;
     int chr;
 
-    if (!PyArg_Parse(arg, "C:bidirectional", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("bidirectional", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_READY(arg)) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("bidirectional", "argument", "a unicode character", arg);
         goto exit;
     }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     return_value = unicodedata_UCD_bidirectional_impl(self, chr);
 
 exit:
@@ -180,9 +246,18 @@ unicodedata_UCD_combining(PyObject *self, PyObject *arg)
     int chr;
     int _return_value;
 
-    if (!PyArg_Parse(arg, "C:combining", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("combining", "argument", "a unicode character", arg);
         goto exit;
     }
+    if (PyUnicode_READY(arg)) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("combining", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     _return_value = unicodedata_UCD_combining_impl(self, chr);
     if ((_return_value == -1) && PyErr_Occurred()) {
         goto exit;
@@ -215,9 +290,18 @@ unicodedata_UCD_mirrored(PyObject *self, PyObject *arg)
     int chr;
     int _return_value;
 
-    if (!PyArg_Parse(arg, "C:mirrored", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("mirrored", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_READY(arg)) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("mirrored", "argument", "a unicode character", arg);
         goto exit;
     }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     _return_value = unicodedata_UCD_mirrored_impl(self, chr);
     if ((_return_value == -1) && PyErr_Occurred()) {
         goto exit;
@@ -246,9 +330,18 @@ unicodedata_UCD_east_asian_width(PyObject *self, PyObject *arg)
     PyObject *return_value = NULL;
     int chr;
 
-    if (!PyArg_Parse(arg, "C:east_asian_width", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("east_asian_width", "argument", "a unicode character", arg);
         goto exit;
     }
+    if (PyUnicode_READY(arg)) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("east_asian_width", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     return_value = unicodedata_UCD_east_asian_width_impl(self, chr);
 
 exit:
@@ -275,15 +368,71 @@ unicodedata_UCD_decomposition(PyObject *self, PyObject *arg)
     PyObject *return_value = NULL;
     int chr;
 
-    if (!PyArg_Parse(arg, "C:decomposition", &chr)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("decomposition", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_READY(arg)) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        _PyArg_BadArgument("decomposition", "argument", "a unicode character", arg);
         goto exit;
     }
+    chr = PyUnicode_READ_CHAR(arg, 0);
     return_value = unicodedata_UCD_decomposition_impl(self, chr);
 
 exit:
     return return_value;
 }
 
+PyDoc_STRVAR(unicodedata_UCD_is_normalized__doc__,
+"is_normalized($self, form, unistr, /)\n"
+"--\n"
+"\n"
+"Return whether the Unicode string unistr is in the normal form \'form\'.\n"
+"\n"
+"Valid values for form are \'NFC\', \'NFKC\', \'NFD\', and \'NFKD\'.");
+
+#define UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF    \
+    {"is_normalized", (PyCFunction)(void(*)(void))unicodedata_UCD_is_normalized, METH_FASTCALL, unicodedata_UCD_is_normalized__doc__},
+
+static PyObject *
+unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
+                                   PyObject *input);
+
+static PyObject *
+unicodedata_UCD_is_normalized(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    PyObject *form;
+    PyObject *input;
+
+    if (!_PyArg_CheckPositional("is_normalized", nargs, 2, 2)) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("is_normalized", "argument 1", "str", args[0]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[0]) == -1) {
+        goto exit;
+    }
+    form = args[0];
+    if (!PyUnicode_Check(args[1])) {
+        _PyArg_BadArgument("is_normalized", "argument 2", "str", args[1]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[1]) == -1) {
+        goto exit;
+    }
+    input = args[1];
+    return_value = unicodedata_UCD_is_normalized_impl(self, form, input);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(unicodedata_UCD_normalize__doc__,
 "normalize($self, form, unistr, /)\n"
 "--\n"
@@ -293,23 +442,38 @@ PyDoc_STRVAR(unicodedata_UCD_normalize__doc__,
 "Valid values for form are \'NFC\', \'NFKC\', \'NFD\', and \'NFKD\'.");
 
 #define UNICODEDATA_UCD_NORMALIZE_METHODDEF    \
-    {"normalize", (PyCFunction)unicodedata_UCD_normalize, METH_VARARGS, unicodedata_UCD_normalize__doc__},
+    {"normalize", (PyCFunction)(void(*)(void))unicodedata_UCD_normalize, METH_FASTCALL, unicodedata_UCD_normalize__doc__},
 
 static PyObject *
-unicodedata_UCD_normalize_impl(PyObject *self, const char *form,
+unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
                                PyObject *input);
 
 static PyObject *
-unicodedata_UCD_normalize(PyObject *self, PyObject *args)
+unicodedata_UCD_normalize(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
-    const char *form;
+    PyObject *form;
     PyObject *input;
 
-    if (!PyArg_ParseTuple(args, "sO!:normalize",
-        &form, &PyUnicode_Type, &input)) {
+    if (!_PyArg_CheckPositional("normalize", nargs, 2, 2)) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("normalize", "argument 1", "str", args[0]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[0]) == -1) {
+        goto exit;
+    }
+    form = args[0];
+    if (!PyUnicode_Check(args[1])) {
+        _PyArg_BadArgument("normalize", "argument 2", "str", args[1]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[1]) == -1) {
         goto exit;
     }
+    input = args[1];
     return_value = unicodedata_UCD_normalize_impl(self, form, input);
 
 exit:
@@ -326,22 +490,38 @@ PyDoc_STRVAR(unicodedata_UCD_name__doc__,
 "ValueError is raised.");
 
 #define UNICODEDATA_UCD_NAME_METHODDEF    \
-    {"name", (PyCFunction)unicodedata_UCD_name, METH_VARARGS, unicodedata_UCD_name__doc__},
+    {"name", (PyCFunction)(void(*)(void))unicodedata_UCD_name, METH_FASTCALL, unicodedata_UCD_name__doc__},
 
 static PyObject *
 unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value);
 
 static PyObject *
-unicodedata_UCD_name(PyObject *self, PyObject *args)
+unicodedata_UCD_name(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     int chr;
     PyObject *default_value = NULL;
 
-    if (!PyArg_ParseTuple(args, "C|O:name",
-        &chr, &default_value)) {
+    if (!_PyArg_CheckPositional("name", nargs, 1, 2)) {
         goto exit;
     }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("name", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    if (PyUnicode_READY(args[0])) {
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+        _PyArg_BadArgument("name", "argument 1", "a unicode character", args[0]);
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(args[0], 0);
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    default_value = args[1];
+skip_optional:
     return_value = unicodedata_UCD_name_impl(self, chr, default_value);
 
 exit:
@@ -362,14 +542,14 @@ PyDoc_STRVAR(unicodedata_UCD_lookup__doc__,
 
 static PyObject *
 unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
-                            Py_ssize_t name_length);
+                            Py_ssize_clean_t name_length);
 
 static PyObject *
 unicodedata_UCD_lookup(PyObject *self, PyObject *arg)
 {
     PyObject *return_value = NULL;
     const char *name;
-    Py_ssize_t name_length;
+    Py_ssize_clean_t name_length;
 
     if (!PyArg_Parse(arg, "s#:lookup", &name, &name_length)) {
         goto exit;
@@ -379,5 +559,4 @@ unicodedata_UCD_lookup(PyObject *self, PyObject *arg)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=78d7a7ae57014502 input=a9049054013a1b77]*/
-
+/*[clinic end generated code: output=10c23477dbe8a202 input=a9049054013a1b77]*/

SnoopJ · 2022-09-23T18:52:11Z

CI failures appear to be specific to 3.11, more specifically a failure in the module initialization code which is still using the older style in this changeset:

    × python setup.py bdist_wheel did not run successfully.
    │ exit code: 1
    ╰─> [13 lines of output]
        running bdist_wheel
        running build
        running build_ext
        building 'unicodedata2' extension
        creating build/temp.linux-x86_64-cpython-311
        creating build/temp.linux-x86_64-cpython-311/unicodedata2
        gcc -pthread -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -fPIC -I./unicodedata2/ -I/opt/_internal/cpython-3.11.0rc2/include/python3.11 -c ./unicodedata2/unicodectype.c -o build/temp.linux-x86_64-cpython-311/./unicodedata2/unicodectype.o
        gcc -pthread -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -fPIC -I./unicodedata2/ -I/opt/_internal/cpython-3.11.0rc2/include/python3.11 -c ./unicodedata2/unicodedata.c -o build/temp.linux-x86_64-cpython-311/./unicodedata2/unicodedata.o
        ./unicodedata2/unicodedata.c: In function ‘PyInit_unicodedata2’:
        ./unicodedata2/unicodedata.c:1362:24: error: lvalue required as left operand of assignment
         1362 |     Py_TYPE(&UCD_Type) = &PyType_Type;
              |                        ^
        error: command '/opt/rh/devtoolset-10/root/usr/bin/gcc' failed with exit code 1
        [end of output]

This reverts commit 63f6c6a.

SnoopJ · 2022-09-23T22:05:14Z

Build is fine on 3.11 now after the revert.

Not sure about the remaining CI failures, seems that the test module is not available in the PyPy the CI is using, although it is there for me when running the suite locally and appears to be in PyPy v7.3.9.

SnoopJ · 2022-09-23T22:50:41Z

The offending import wasn't being used for anything, so disabling it is no big deal. Not sure if this will crop up in the future for parts of the test suite that use more helpers, but as it stands right now I don't see compelling reasons to port those things to this library.

Looks like the CI environment doesn't have test because it's explicitly removed by the PyPA recipe (edit: this has been fixed upstream)

anthrotype · 2022-10-28T18:11:27Z

hey @SnoopJ sorry for the long wait. Is this PR ready for merge?

SnoopJ · 2022-10-28T18:51:55Z

hey @SnoopJ sorry for the long wait. Is this PR ready for merge?

@anthrotype No apology necessary, thanks for the ping 😁. I don't recall there being anything outstanding with this PR and it passes the test suite for py{36, 37, 38, 39, 310, 311}, pypy{37,38,39} (I was thinking about opening another PR about adding those targets to tox.ini, incidentally)

So, yep, this is ready to go as far as I'm concerned!

anthrotype · 2022-10-28T19:02:01Z

OK, I'll merge this then

I was thinking about opening another PR about adding those targets to tox.ini, incidentally

please do thank you!

SnoopJ added 16 commits September 23, 2022 13:21

Add _PyUnicode2_ToFoldedFull()

bbe96cb

Upstream changes from: python/cpython@d5890c8#diff-6b9cc290ba46b071368fc3669a3be47b96bacdd08e84e34ad234235e3b74d244R252-R266

Fix incorrect docstrings

0eb2c53

Upstream changes from: python/cpython@279f446#diff-59c1e610d58bb87487b55fdaa508712daf753d5ee1b036f047b53c63721f9091

Add compatibility header

b9522a0

Use proper name for ToLowerFull()

8cb2d39

Adjust spelling

5bab163

Upstream changes from: python/cpython@a5293b4#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Fix bugs in unicodedata2.normalize

b9e4125

Upstream changes from: python/cpython@d134809

Use renamed Py_TYPE macro

63f6c6a

Upstream changes from:python/cpython@90aa764#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Re-run clinic (v3.7.14) on unicodedata.c

a46af46

Upstream changes from: python/cpython@6359641#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Adjust Argument Clinic input, remove ready check

0ccd2f1

Upstream changes from: python/cpython@f8d7d41#diff-2cd482084cd623be20584ff84ef4b0ff781c0d955546048585a855156aedfa78

Synchronize makeunicodedata with CPython v3.10.7

2cc2e92

Clarify that static keyword is a deliberate difference

968386d

Backport fix for decomposition caching from 3.11

3289f9a

Upstream changes from: python/cpython@2d9f252#diff-17ec60c9bf5044a66fffc2f5bd5b6c4a6df192332bd2762ddf4336a80f8f8f25

Use correct bounds for CJK Ideograph Extensions C/H

d5f9d58

Re-run makeunicodedata.py

486a96e

Port east_asian_width() bugfix from CPython

a949b12

Upstream changes from: python/cpython@9c197bc

SnoopJ mentioned this pull request Sep 23, 2022

Synchronize with CPython #56

Closed

Revert "Use renamed Py_TYPE macro"

81ab7ae

This reverts commit 63f6c6a.

Comment-out unused import (prevents PyPy CI failures)

75cdcb9

SnoopJ mentioned this pull request Sep 23, 2022

The test module is available for CPython, but not PyPy pypa/manylinux#1382

Closed

anthrotype mentioned this pull request Oct 28, 2022

Doesn't build for Python 3.11 #52

Closed

anthrotype merged commit ed442aa into fonttools:master Oct 28, 2022

SnoopJ deleted the feature/sync-API-with-upstream branch October 28, 2022 19:02

anthrotype mentioned this pull request Oct 28, 2022

Port several things from cpython here #39

Closed

SnoopJ mentioned this pull request Oct 28, 2022

Declare explicit target environments for tox #59

Merged

SnoopJ mentioned this pull request Sep 19, 2023

Update for Unicode 15.1 #60

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Synchronize with CPython 3.7.14 C-API usage, backport #58

Synchronize with CPython 3.7.14 C-API usage, backport #58

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

anthrotype commented Oct 28, 2022

SnoopJ commented Oct 28, 2022

anthrotype commented Oct 28, 2022

Synchronize with CPython 3.7.14 C-API usage, backport #58

Synchronize with CPython 3.7.14 C-API usage, backport #58

Conversation

SnoopJ commented Sep 23, 2022 • edited Loading

SnoopJ commented Sep 23, 2022 • edited Loading

SnoopJ commented Sep 23, 2022 • edited Loading

SnoopJ commented Sep 23, 2022 • edited Loading

SnoopJ commented Sep 23, 2022 • edited Loading

anthrotype commented Oct 28, 2022

SnoopJ commented Oct 28, 2022

anthrotype commented Oct 28, 2022

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading

SnoopJ commented Sep 23, 2022 •

edited

Loading