-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Synchronize with CPython 3.7.14 C-API usage, backport #58
Synchronize with CPython 3.7.14 C-API usage, backport #58
Conversation
Upstream changes from: python/cpython@d134809
Upstream changes from: python/cpython@93cbca3#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue3811 ) python/cpython@1b08b30#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue5828 ) python/cpython@71efeb7#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue4971 ) python/cpython@806d8cf#diff-c31ff7b8fca97de6b4fdaca3e14da27ab3cac411653e9c510a5378b189f909ea (see https://bugs.python.org/issue7643 )
Upstream changes from: python/cpython@9c197bc
When I initially set out to write this PR, I was hoping that I could achieve better synchronization with the leading-edge upstream changes to Expand for a diff of `unicodedata.c.h` against v3.10.7diff --git a/unicodedata2/unicodedata.c.h b/home/jgerity/repos/cpython/Modules/clinic/unicodedata.c.h
index 524505d..4251db2 100644
--- a/unicodedata2/unicodedata.c.h
+++ b/home/jgerity/repos/cpython/Modules/clinic/unicodedata.c.h
@@ -13,23 +13,39 @@ PyDoc_STRVAR(unicodedata_UCD_decimal__doc__,
"ValueError is raised.");
#define UNICODEDATA_UCD_DECIMAL_METHODDEF \
- {"decimal", (PyCFunction)unicodedata_UCD_decimal, METH_VARARGS, unicodedata_UCD_decimal__doc__},
+ {"decimal", (PyCFunction)(void(*)(void))unicodedata_UCD_decimal, METH_FASTCALL, unicodedata_UCD_decimal__doc__},
static PyObject *
unicodedata_UCD_decimal_impl(PyObject *self, int chr,
PyObject *default_value);
static PyObject *
-unicodedata_UCD_decimal(PyObject *self, PyObject *args)
+unicodedata_UCD_decimal(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
int chr;
PyObject *default_value = NULL;
- if (!PyArg_ParseTuple(args, "C|O:decimal",
- &chr, &default_value)) {
+ if (!_PyArg_CheckPositional("decimal", nargs, 1, 2)) {
goto exit;
}
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("decimal", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[0])) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+ _PyArg_BadArgument("decimal", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(args[0], 0);
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ default_value = args[1];
+skip_optional:
return_value = unicodedata_UCD_decimal_impl(self, chr, default_value);
exit:
@@ -47,22 +63,38 @@ PyDoc_STRVAR(unicodedata_UCD_digit__doc__,
"ValueError is raised.");
#define UNICODEDATA_UCD_DIGIT_METHODDEF \
- {"digit", (PyCFunction)unicodedata_UCD_digit, METH_VARARGS, unicodedata_UCD_digit__doc__},
+ {"digit", (PyCFunction)(void(*)(void))unicodedata_UCD_digit, METH_FASTCALL, unicodedata_UCD_digit__doc__},
static PyObject *
unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value);
static PyObject *
-unicodedata_UCD_digit(PyObject *self, PyObject *args)
+unicodedata_UCD_digit(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
int chr;
PyObject *default_value = NULL;
- if (!PyArg_ParseTuple(args, "C|O:digit",
- &chr, &default_value)) {
+ if (!_PyArg_CheckPositional("digit", nargs, 1, 2)) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("digit", "argument 1", "a unicode character", args[0]);
goto exit;
}
+ if (PyUnicode_READY(args[0])) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+ _PyArg_BadArgument("digit", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(args[0], 0);
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ default_value = args[1];
+skip_optional:
return_value = unicodedata_UCD_digit_impl(self, chr, default_value);
exit:
@@ -80,23 +112,39 @@ PyDoc_STRVAR(unicodedata_UCD_numeric__doc__,
"ValueError is raised.");
#define UNICODEDATA_UCD_NUMERIC_METHODDEF \
- {"numeric", (PyCFunction)unicodedata_UCD_numeric, METH_VARARGS, unicodedata_UCD_numeric__doc__},
+ {"numeric", (PyCFunction)(void(*)(void))unicodedata_UCD_numeric, METH_FASTCALL, unicodedata_UCD_numeric__doc__},
static PyObject *
unicodedata_UCD_numeric_impl(PyObject *self, int chr,
PyObject *default_value);
static PyObject *
-unicodedata_UCD_numeric(PyObject *self, PyObject *args)
+unicodedata_UCD_numeric(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
int chr;
PyObject *default_value = NULL;
- if (!PyArg_ParseTuple(args, "C|O:numeric",
- &chr, &default_value)) {
+ if (!_PyArg_CheckPositional("numeric", nargs, 1, 2)) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("numeric", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[0])) {
goto exit;
}
+ if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+ _PyArg_BadArgument("numeric", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(args[0], 0);
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ default_value = args[1];
+skip_optional:
return_value = unicodedata_UCD_numeric_impl(self, chr, default_value);
exit:
@@ -121,9 +169,18 @@ unicodedata_UCD_category(PyObject *self, PyObject *arg)
PyObject *return_value = NULL;
int chr;
- if (!PyArg_Parse(arg, "C:category", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("category", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ if (PyUnicode_READY(arg)) {
goto exit;
}
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("category", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(arg, 0);
return_value = unicodedata_UCD_category_impl(self, chr);
exit:
@@ -150,9 +207,18 @@ unicodedata_UCD_bidirectional(PyObject *self, PyObject *arg)
PyObject *return_value = NULL;
int chr;
- if (!PyArg_Parse(arg, "C:bidirectional", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("bidirectional", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ if (PyUnicode_READY(arg)) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("bidirectional", "argument", "a unicode character", arg);
goto exit;
}
+ chr = PyUnicode_READ_CHAR(arg, 0);
return_value = unicodedata_UCD_bidirectional_impl(self, chr);
exit:
@@ -180,9 +246,18 @@ unicodedata_UCD_combining(PyObject *self, PyObject *arg)
int chr;
int _return_value;
- if (!PyArg_Parse(arg, "C:combining", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("combining", "argument", "a unicode character", arg);
goto exit;
}
+ if (PyUnicode_READY(arg)) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("combining", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(arg, 0);
_return_value = unicodedata_UCD_combining_impl(self, chr);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
@@ -215,9 +290,18 @@ unicodedata_UCD_mirrored(PyObject *self, PyObject *arg)
int chr;
int _return_value;
- if (!PyArg_Parse(arg, "C:mirrored", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("mirrored", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ if (PyUnicode_READY(arg)) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("mirrored", "argument", "a unicode character", arg);
goto exit;
}
+ chr = PyUnicode_READ_CHAR(arg, 0);
_return_value = unicodedata_UCD_mirrored_impl(self, chr);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
@@ -246,9 +330,18 @@ unicodedata_UCD_east_asian_width(PyObject *self, PyObject *arg)
PyObject *return_value = NULL;
int chr;
- if (!PyArg_Parse(arg, "C:east_asian_width", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("east_asian_width", "argument", "a unicode character", arg);
goto exit;
}
+ if (PyUnicode_READY(arg)) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("east_asian_width", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(arg, 0);
return_value = unicodedata_UCD_east_asian_width_impl(self, chr);
exit:
@@ -275,15 +368,71 @@ unicodedata_UCD_decomposition(PyObject *self, PyObject *arg)
PyObject *return_value = NULL;
int chr;
- if (!PyArg_Parse(arg, "C:decomposition", &chr)) {
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("decomposition", "argument", "a unicode character", arg);
+ goto exit;
+ }
+ if (PyUnicode_READY(arg)) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(arg) != 1) {
+ _PyArg_BadArgument("decomposition", "argument", "a unicode character", arg);
goto exit;
}
+ chr = PyUnicode_READ_CHAR(arg, 0);
return_value = unicodedata_UCD_decomposition_impl(self, chr);
exit:
return return_value;
}
+PyDoc_STRVAR(unicodedata_UCD_is_normalized__doc__,
+"is_normalized($self, form, unistr, /)\n"
+"--\n"
+"\n"
+"Return whether the Unicode string unistr is in the normal form \'form\'.\n"
+"\n"
+"Valid values for form are \'NFC\', \'NFKC\', \'NFD\', and \'NFKD\'.");
+
+#define UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF \
+ {"is_normalized", (PyCFunction)(void(*)(void))unicodedata_UCD_is_normalized, METH_FASTCALL, unicodedata_UCD_is_normalized__doc__},
+
+static PyObject *
+unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
+ PyObject *input);
+
+static PyObject *
+unicodedata_UCD_is_normalized(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *form;
+ PyObject *input;
+
+ if (!_PyArg_CheckPositional("is_normalized", nargs, 2, 2)) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("is_normalized", "argument 1", "str", args[0]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[0]) == -1) {
+ goto exit;
+ }
+ form = args[0];
+ if (!PyUnicode_Check(args[1])) {
+ _PyArg_BadArgument("is_normalized", "argument 2", "str", args[1]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[1]) == -1) {
+ goto exit;
+ }
+ input = args[1];
+ return_value = unicodedata_UCD_is_normalized_impl(self, form, input);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(unicodedata_UCD_normalize__doc__,
"normalize($self, form, unistr, /)\n"
"--\n"
@@ -293,23 +442,38 @@ PyDoc_STRVAR(unicodedata_UCD_normalize__doc__,
"Valid values for form are \'NFC\', \'NFKC\', \'NFD\', and \'NFKD\'.");
#define UNICODEDATA_UCD_NORMALIZE_METHODDEF \
- {"normalize", (PyCFunction)unicodedata_UCD_normalize, METH_VARARGS, unicodedata_UCD_normalize__doc__},
+ {"normalize", (PyCFunction)(void(*)(void))unicodedata_UCD_normalize, METH_FASTCALL, unicodedata_UCD_normalize__doc__},
static PyObject *
-unicodedata_UCD_normalize_impl(PyObject *self, const char *form,
+unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
PyObject *input);
static PyObject *
-unicodedata_UCD_normalize(PyObject *self, PyObject *args)
+unicodedata_UCD_normalize(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
- const char *form;
+ PyObject *form;
PyObject *input;
- if (!PyArg_ParseTuple(args, "sO!:normalize",
- &form, &PyUnicode_Type, &input)) {
+ if (!_PyArg_CheckPositional("normalize", nargs, 2, 2)) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("normalize", "argument 1", "str", args[0]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[0]) == -1) {
+ goto exit;
+ }
+ form = args[0];
+ if (!PyUnicode_Check(args[1])) {
+ _PyArg_BadArgument("normalize", "argument 2", "str", args[1]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[1]) == -1) {
goto exit;
}
+ input = args[1];
return_value = unicodedata_UCD_normalize_impl(self, form, input);
exit:
@@ -326,22 +490,38 @@ PyDoc_STRVAR(unicodedata_UCD_name__doc__,
"ValueError is raised.");
#define UNICODEDATA_UCD_NAME_METHODDEF \
- {"name", (PyCFunction)unicodedata_UCD_name, METH_VARARGS, unicodedata_UCD_name__doc__},
+ {"name", (PyCFunction)(void(*)(void))unicodedata_UCD_name, METH_FASTCALL, unicodedata_UCD_name__doc__},
static PyObject *
unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value);
static PyObject *
-unicodedata_UCD_name(PyObject *self, PyObject *args)
+unicodedata_UCD_name(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
int chr;
PyObject *default_value = NULL;
- if (!PyArg_ParseTuple(args, "C|O:name",
- &chr, &default_value)) {
+ if (!_PyArg_CheckPositional("name", nargs, 1, 2)) {
goto exit;
}
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("name", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ if (PyUnicode_READY(args[0])) {
+ goto exit;
+ }
+ if (PyUnicode_GET_LENGTH(args[0]) != 1) {
+ _PyArg_BadArgument("name", "argument 1", "a unicode character", args[0]);
+ goto exit;
+ }
+ chr = PyUnicode_READ_CHAR(args[0], 0);
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ default_value = args[1];
+skip_optional:
return_value = unicodedata_UCD_name_impl(self, chr, default_value);
exit:
@@ -362,14 +542,14 @@ PyDoc_STRVAR(unicodedata_UCD_lookup__doc__,
static PyObject *
unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
- Py_ssize_t name_length);
+ Py_ssize_clean_t name_length);
static PyObject *
unicodedata_UCD_lookup(PyObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
const char *name;
- Py_ssize_t name_length;
+ Py_ssize_clean_t name_length;
if (!PyArg_Parse(arg, "s#:lookup", &name, &name_length)) {
goto exit;
@@ -379,5 +559,4 @@ unicodedata_UCD_lookup(PyObject *self, PyObject *arg)
exit:
return return_value;
}
-/*[clinic end generated code: output=78d7a7ae57014502 input=a9049054013a1b77]*/
-
+/*[clinic end generated code: output=10c23477dbe8a202 input=a9049054013a1b77]*/
|
CI failures appear to be specific to 3.11, more specifically a failure in the module initialization code which is still using the older style in this changeset:
|
This reverts commit 63f6c6a.
Build is fine on 3.11 now after the revert. Not sure about the remaining CI failures, seems that the |
The offending import wasn't being used for anything, so disabling it is no big deal. Not sure if this will crop up in the future for parts of the test suite that use more helpers, but as it stands right now I don't see compelling reasons to port those things to this library. Looks like the CI environment doesn't have |
hey @SnoopJ sorry for the long wait. Is this PR ready for merge? |
@anthrotype No apology necessary, thanks for the ping 😁. I don't recall there being anything outstanding with this PR and it passes the test suite for So, yep, this is ready to go as far as I'm concerned! |
OK, I'll merge this then
please do thank you! |
This PR updates C API usage in
unicodedata2
to better match CPython upstream. See #56Also included a handful of bugfixes from later versions of CPython.
More specifically, this changeset aims for a best-effort match to C API usage as of CPython v3.7.14, see expandable section below for a diff of the residual differences.
This changeset passes the test suite (including new tests) on CPython 3.7-3.10 and PyPy 3.7-3.9. I would be happy to update
tox.ini
for testing against these versions if desired. The tests do also pass on CPython 3.6 (but not on PyPy 3.6)Expand for diff of `unicodedata.c` vs v3.7.14
This may duplicate some of what's in #39, but that looks like it's abandoned