Skip to content

Commit deb921f

Browse files
pythongh-117431: Adapt bytes and bytearray .find() and friends to Argument Clinic (python#117502)
This change gives a significant speedup, as the METH_FASTCALL calling convention is now used. The following bytes and bytearray methods are adapted: - count() - find() - index() - rfind() - rindex() Co-authored-by: Inada Naoki <[email protected]>
1 parent 49fc141 commit deb921f

File tree

8 files changed

+703
-164
lines changed

8 files changed

+703
-164
lines changed

Include/internal/pycore_bytes_methods.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,16 @@ extern void _Py_bytes_title(char *result, const char *s, Py_ssize_t len);
2626
extern void _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len);
2727
extern void _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len);
2828

29-
extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args);
30-
extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args);
31-
extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args);
32-
extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args);
33-
extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args);
29+
extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub,
30+
Py_ssize_t start, Py_ssize_t end);
31+
extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub,
32+
Py_ssize_t start, Py_ssize_t end);
33+
extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub,
34+
Py_ssize_t start, Py_ssize_t end);
35+
extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub,
36+
Py_ssize_t start, Py_ssize_t end);
37+
extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub,
38+
Py_ssize_t start, Py_ssize_t end);
3439
extern int _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg);
3540
extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len,
3641
PyObject *subobj, Py_ssize_t start,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Improve the performance of the following :class:`bytes` and
2+
:class:`bytearray` methods by adapting them to the :c:macro:`METH_FASTCALL`
3+
calling convention:
4+
5+
* :meth:`!count`
6+
* :meth:`!find`
7+
* :meth:`!index`
8+
* :meth:`!rfind`
9+
* :meth:`!rindex`

Objects/bytearrayobject.c

+76-17
Original file line numberDiff line numberDiff line change
@@ -1121,16 +1121,44 @@ bytearray_dealloc(PyByteArrayObject *self)
11211121
#include "stringlib/transmogrify.h"
11221122

11231123

1124+
/*[clinic input]
1125+
@text_signature "($self, sub[, start[, end]], /)"
1126+
bytearray.find
1127+
1128+
sub: object
1129+
start: slice_index(accept={int, NoneType}, c_default='0') = None
1130+
Optional start position. Default: start of the bytes.
1131+
end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
1132+
Optional stop position. Default: end of the bytes.
1133+
/
1134+
1135+
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].
1136+
1137+
Return -1 on failure.
1138+
[clinic start generated code]*/
1139+
11241140
static PyObject *
1125-
bytearray_find(PyByteArrayObject *self, PyObject *args)
1141+
bytearray_find_impl(PyByteArrayObject *self, PyObject *sub, Py_ssize_t start,
1142+
Py_ssize_t end)
1143+
/*[clinic end generated code: output=413e1cab2ae87da0 input=793dfad803e2952f]*/
11261144
{
1127-
return _Py_bytes_find(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
1145+
return _Py_bytes_find(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1146+
sub, start, end);
11281147
}
11291148

1149+
/*[clinic input]
1150+
bytearray.count = bytearray.find
1151+
1152+
Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
1153+
[clinic start generated code]*/
1154+
11301155
static PyObject *
1131-
bytearray_count(PyByteArrayObject *self, PyObject *args)
1156+
bytearray_count_impl(PyByteArrayObject *self, PyObject *sub,
1157+
Py_ssize_t start, Py_ssize_t end)
1158+
/*[clinic end generated code: output=a21ee2692e4f1233 input=4deb529db38deda8]*/
11321159
{
1133-
return _Py_bytes_count(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
1160+
return _Py_bytes_count(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1161+
sub, start, end);
11341162
}
11351163

11361164
/*[clinic input]
@@ -1162,22 +1190,55 @@ bytearray_copy_impl(PyByteArrayObject *self)
11621190
PyByteArray_GET_SIZE(self));
11631191
}
11641192

1193+
/*[clinic input]
1194+
bytearray.index = bytearray.find
1195+
1196+
Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].
1197+
1198+
Raise ValueError if the subsection is not found.
1199+
[clinic start generated code]*/
1200+
11651201
static PyObject *
1166-
bytearray_index(PyByteArrayObject *self, PyObject *args)
1202+
bytearray_index_impl(PyByteArrayObject *self, PyObject *sub,
1203+
Py_ssize_t start, Py_ssize_t end)
1204+
/*[clinic end generated code: output=067a1e78efc672a7 input=8cbaf6836dbd2a9a]*/
11671205
{
1168-
return _Py_bytes_index(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
1206+
return _Py_bytes_index(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1207+
sub, start, end);
11691208
}
11701209

1210+
/*[clinic input]
1211+
bytearray.rfind = bytearray.find
1212+
1213+
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].
1214+
1215+
Return -1 on failure.
1216+
[clinic start generated code]*/
1217+
11711218
static PyObject *
1172-
bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1219+
bytearray_rfind_impl(PyByteArrayObject *self, PyObject *sub,
1220+
Py_ssize_t start, Py_ssize_t end)
1221+
/*[clinic end generated code: output=51bf886f932b283c input=eaa107468a158423]*/
11731222
{
1174-
return _Py_bytes_rfind(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
1223+
return _Py_bytes_rfind(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1224+
sub, start, end);
11751225
}
11761226

1227+
/*[clinic input]
1228+
bytearray.rindex = bytearray.find
1229+
1230+
Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].
1231+
1232+
Raise ValueError if the subsection is not found.
1233+
[clinic start generated code]*/
1234+
11771235
static PyObject *
1178-
bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1236+
bytearray_rindex_impl(PyByteArrayObject *self, PyObject *sub,
1237+
Py_ssize_t start, Py_ssize_t end)
1238+
/*[clinic end generated code: output=38e1cf66bafb08b9 input=81cf49d0af4d5bd0]*/
11791239
{
1180-
return _Py_bytes_rindex(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
1240+
return _Py_bytes_rindex(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1241+
sub, start, end);
11811242
}
11821243

11831244
static int
@@ -2236,17 +2297,15 @@ bytearray_methods[] = {
22362297
STRINGLIB_CENTER_METHODDEF
22372298
BYTEARRAY_CLEAR_METHODDEF
22382299
BYTEARRAY_COPY_METHODDEF
2239-
{"count", (PyCFunction)bytearray_count, METH_VARARGS,
2240-
_Py_count__doc__},
2300+
BYTEARRAY_COUNT_METHODDEF
22412301
BYTEARRAY_DECODE_METHODDEF
22422302
BYTEARRAY_ENDSWITH_METHODDEF
22432303
STRINGLIB_EXPANDTABS_METHODDEF
22442304
BYTEARRAY_EXTEND_METHODDEF
2245-
{"find", (PyCFunction)bytearray_find, METH_VARARGS,
2246-
_Py_find__doc__},
2305+
BYTEARRAY_FIND_METHODDEF
22472306
BYTEARRAY_FROMHEX_METHODDEF
22482307
BYTEARRAY_HEX_METHODDEF
2249-
{"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__},
2308+
BYTEARRAY_INDEX_METHODDEF
22502309
BYTEARRAY_INSERT_METHODDEF
22512310
{"isalnum", stringlib_isalnum, METH_NOARGS,
22522311
_Py_isalnum__doc__},
@@ -2276,8 +2335,8 @@ bytearray_methods[] = {
22762335
BYTEARRAY_REMOVEPREFIX_METHODDEF
22772336
BYTEARRAY_REMOVESUFFIX_METHODDEF
22782337
BYTEARRAY_REVERSE_METHODDEF
2279-
{"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, _Py_rfind__doc__},
2280-
{"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, _Py_rindex__doc__},
2338+
BYTEARRAY_RFIND_METHODDEF
2339+
BYTEARRAY_RINDEX_METHODDEF
22812340
STRINGLIB_RJUST_METHODDEF
22822341
BYTEARRAY_RPARTITION_METHODDEF
22832342
BYTEARRAY_RSPLIT_METHODDEF

Objects/bytes_methods.c

+26-76
Original file line numberDiff line numberDiff line change
@@ -453,31 +453,21 @@ stringlib_parse_args_finds().
453453
*/
454454

455455
Py_LOCAL_INLINE(int)
456-
parse_args_finds_byte(const char *function_name, PyObject *args,
457-
PyObject **subobj, char *byte,
458-
Py_ssize_t *start, Py_ssize_t *end)
456+
parse_args_finds_byte(const char *function_name, PyObject **subobj, char *byte)
459457
{
460-
PyObject *tmp_subobj;
461-
Py_ssize_t ival;
462-
463-
if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
464-
start, end))
465-
return 0;
466-
467-
if (PyObject_CheckBuffer(tmp_subobj)) {
468-
*subobj = tmp_subobj;
458+
if (PyObject_CheckBuffer(*subobj)) {
469459
return 1;
470460
}
471461

472-
if (!_PyIndex_Check(tmp_subobj)) {
462+
if (!_PyIndex_Check(*subobj)) {
473463
PyErr_Format(PyExc_TypeError,
474464
"argument should be integer or bytes-like object, "
475465
"not '%.200s'",
476-
Py_TYPE(tmp_subobj)->tp_name);
466+
Py_TYPE(*subobj)->tp_name);
477467
return 0;
478468
}
479469

480-
ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
470+
Py_ssize_t ival = PyNumber_AsSsize_t(*subobj, NULL);
481471
if (ival == -1 && PyErr_Occurred()) {
482472
return 0;
483473
}
@@ -508,19 +498,19 @@ parse_args_finds_byte(const char *function_name, PyObject *args,
508498

509499
Py_LOCAL_INLINE(Py_ssize_t)
510500
find_internal(const char *str, Py_ssize_t len,
511-
const char *function_name, PyObject *args, int dir)
501+
const char *function_name, PyObject *subobj,
502+
Py_ssize_t start, Py_ssize_t end,
503+
int dir)
512504
{
513-
PyObject *subobj;
514505
char byte;
515506
Py_buffer subbuf;
516507
const char *sub;
517508
Py_ssize_t sub_len;
518-
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
519509
Py_ssize_t res;
520510

521-
if (!parse_args_finds_byte(function_name, args,
522-
&subobj, &byte, &start, &end))
511+
if (!parse_args_finds_byte(function_name, &subobj, &byte)) {
523512
return -2;
513+
}
524514

525515
if (subobj) {
526516
if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
@@ -566,37 +556,21 @@ find_internal(const char *str, Py_ssize_t len,
566556
return res;
567557
}
568558

569-
PyDoc_STRVAR_shared(_Py_find__doc__,
570-
"B.find(sub[, start[, end]]) -> int\n\
571-
\n\
572-
Return the lowest index in B where subsection sub is found,\n\
573-
such that sub is contained within B[start,end]. Optional\n\
574-
arguments start and end are interpreted as in slice notation.\n\
575-
\n\
576-
Return -1 on failure.");
577-
578559
PyObject *
579-
_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
560+
_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub,
561+
Py_ssize_t start, Py_ssize_t end)
580562
{
581-
Py_ssize_t result = find_internal(str, len, "find", args, +1);
563+
Py_ssize_t result = find_internal(str, len, "find", sub, start, end, +1);
582564
if (result == -2)
583565
return NULL;
584566
return PyLong_FromSsize_t(result);
585567
}
586568

587-
PyDoc_STRVAR_shared(_Py_index__doc__,
588-
"B.index(sub[, start[, end]]) -> int\n\
589-
\n\
590-
Return the lowest index in B where subsection sub is found,\n\
591-
such that sub is contained within B[start,end]. Optional\n\
592-
arguments start and end are interpreted as in slice notation.\n\
593-
\n\
594-
Raises ValueError when the subsection is not found.");
595-
596569
PyObject *
597-
_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
570+
_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub,
571+
Py_ssize_t start, Py_ssize_t end)
598572
{
599-
Py_ssize_t result = find_internal(str, len, "index", args, +1);
573+
Py_ssize_t result = find_internal(str, len, "index", sub, start, end, +1);
600574
if (result == -2)
601575
return NULL;
602576
if (result == -1) {
@@ -607,37 +581,21 @@ _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
607581
return PyLong_FromSsize_t(result);
608582
}
609583

610-
PyDoc_STRVAR_shared(_Py_rfind__doc__,
611-
"B.rfind(sub[, start[, end]]) -> int\n\
612-
\n\
613-
Return the highest index in B where subsection sub is found,\n\
614-
such that sub is contained within B[start,end]. Optional\n\
615-
arguments start and end are interpreted as in slice notation.\n\
616-
\n\
617-
Return -1 on failure.");
618-
619584
PyObject *
620-
_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
585+
_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub,
586+
Py_ssize_t start, Py_ssize_t end)
621587
{
622-
Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
588+
Py_ssize_t result = find_internal(str, len, "rfind", sub, start, end, -1);
623589
if (result == -2)
624590
return NULL;
625591
return PyLong_FromSsize_t(result);
626592
}
627593

628-
PyDoc_STRVAR_shared(_Py_rindex__doc__,
629-
"B.rindex(sub[, start[, end]]) -> int\n\
630-
\n\
631-
Return the highest index in B where subsection sub is found,\n\
632-
such that sub is contained within B[start,end]. Optional\n\
633-
arguments start and end are interpreted as in slice notation.\n\
634-
\n\
635-
Raise ValueError when the subsection is not found.");
636-
637594
PyObject *
638-
_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
595+
_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub,
596+
Py_ssize_t start, Py_ssize_t end)
639597
{
640-
Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
598+
Py_ssize_t result = find_internal(str, len, "rindex", sub, start, end, -1);
641599
if (result == -2)
642600
return NULL;
643601
if (result == -1) {
@@ -648,28 +606,20 @@ _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
648606
return PyLong_FromSsize_t(result);
649607
}
650608

651-
PyDoc_STRVAR_shared(_Py_count__doc__,
652-
"B.count(sub[, start[, end]]) -> int\n\
653-
\n\
654-
Return the number of non-overlapping occurrences of subsection sub in\n\
655-
bytes B[start:end]. Optional arguments start and end are interpreted\n\
656-
as in slice notation.");
657-
658609
PyObject *
659-
_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
610+
_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub_obj,
611+
Py_ssize_t start, Py_ssize_t end)
660612
{
661-
PyObject *sub_obj;
662613
const char *sub;
663614
Py_ssize_t sub_len;
664615
char byte;
665-
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
666616

667617
Py_buffer vsub;
668618
PyObject *count_obj;
669619

670-
if (!parse_args_finds_byte("count", args,
671-
&sub_obj, &byte, &start, &end))
620+
if (!parse_args_finds_byte("count", &sub_obj, &byte)) {
672621
return NULL;
622+
}
673623

674624
if (sub_obj) {
675625
if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)

0 commit comments

Comments
 (0)