Skip to content

Commit b3dec4a

Browse files
committed
Improve internal function argument parsing performance by reducing code bloat
The goal of this PR is to reduce some of the code bloat induced by fast-ZPP. Reduced code bloat results in fewer cache misses (and better DSB coverage), and fewer instructions executed. If we take a look at a simple function: ```c PHP_FUNCTION(twice) { zend_long foo; ZEND_PARSE_PARAMETERS_START(1, 1) Z_PARAM_LONG(foo) ZEND_PARSE_PARAMETERS_END(); RETURN_LONG(foo * 2); } ``` We obtain the following assembly on x86-64 in the non-cold blocks: ```s <+0>: push %r12 <+2>: push %rbp <+3>: push %rbx <+4>: sub $0x10,%rsp <+8>: mov %fs:0x28,%r12 <+17>: mov %r12,0x8(%rsp) <+22>: mov 0x2c(%rdi),%r12d <+26>: cmp $0x1,%r12d <+30>: jne 0x22beaf <zif_twice-3212257> <+36>: cmpb $0x4,0x58(%rdi) <+40>: mov %rsi,%rbp <+43>: jne 0x53c2f0 <zif_twice+96> <+45>: mov 0x50(%rdi),%rax <+49>: add %rax,%rax <+52>: movl $0x4,0x8(%rbp) <+59>: mov %rax,0x0(%rbp) <+63>: mov 0x8(%rsp),%rax <+68>: sub %fs:0x28,%rax <+77>: jne 0x53c312 <zif_twice+130> <+79>: add $0x10,%rsp <+83>: pop %rbx <+84>: pop %rbp <+85>: pop %r12 <+87>: ret <+88>: nopl 0x0(%rax,%rax,1) <+96>: lea 0x50(%rdi),%rbx <+100>: mov %rsp,%rsi <+103>: mov $0x1,%edx <+108>: mov %rbx,%rdi <+111>: call 0x620240 <zend_parse_arg_long_slow> <+116>: test %al,%al <+118>: je 0x22be96 <zif_twice.cold> <+124>: mov (%rsp),%rax <+128>: jmp 0x53c2c1 <zif_twice+49> <+130>: call 0x201050 <__stack_chk_fail@plt> ``` Notice how we get the stack protector overhead in this function and also have to reload the parsed value on the slow path. This happens because the parsed value is returned via a pointer. If instead we were to return struct with a value pair (similar to optional in C++ / Option in Rust), then the values are returned via registers. This means that we no longer have stack protector overhead and we also don't need to reload a value, resulting in better register usage. This is the resulting assembly for the sample function after this patch: ```s <+0>: push %r12 <+2>: push %rbp <+3>: push %rbx <+4>: mov 0x2c(%rdi),%r12d <+8>: cmp $0x1,%r12d <+12>: jne 0x22d482 <zif_twice-3205454> <+18>: cmpb $0x4,0x58(%rdi) <+22>: mov %rsi,%rbp <+25>: jne 0x53be08 <zif_twice+56> <+27>: mov 0x50(%rdi),%rax <+31>: add %rax,%rax <+34>: movl $0x4,0x8(%rbp) <+41>: mov %rax,0x0(%rbp) <+45>: pop %rbx <+46>: pop %rbp <+47>: pop %r12 <+49>: ret <+50>: nopw 0x0(%rax,%rax,1) <+56>: lea 0x50(%rdi),%rbx <+60>: mov $0x1,%esi <+65>: mov %rbx,%rdi <+68>: call 0x61e7b0 <zend_parse_arg_long_slow> <+73>: test %dl,%dl <+75>: je 0x22d46a <zif_twice.cold> <+81>: jmp 0x53bdef <zif_twice+31> ``` The following uses the default benchmark programs we use in CI. Each program is ran on php-cgi with the appropriate `-T` argument, then repeated 15 times. It shows a small performance improvement on Symfony both with and without JIT, and a small improvement on WordPress with JIT. For WordPress, the difference is small as my CPU is bottlenecked on some other stuff as well. | Test | Old Mean | Old Stddev | New Mean | New Stddev | |---------------------------------|----------|------------|----------|------------| | Symfony, no JIT (-T10,50) | 0.5324 | 0.0050 | 0.5272 | 0.0042 | | Symfony, tracing JIT (-T10,50) | 0.5301 | 0.0029 | 0.5264 | 0.0036 | | WordPress, no JIT (-T5,25) | 0.7408 | 0.0049 | 0.7404 | 0.0048 | | WordPress, tracing JIT (-T5,25) | 0.6814 | 0.0052 | 0.6770 | 0.0055 | I was not able to measure any meaningful difference for our micro benchmarks `Zend/bench.php` and `Zend/micro_bench.php`. The Valgrind instruction counts also show a decrease: -0.19% on Symfony without JIT, and -0.14% on WordPress without JIT (see CI).
1 parent c919ab4 commit b3dec4a

File tree

14 files changed

+237
-151
lines changed

14 files changed

+237
-151
lines changed

UPGRADING.INTERNALS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ PHP 8.5 INTERNALS UPGRADE NOTES
2727
runtime.
2828
. Removed the cache_slot argument of zend_check_user_type_slow() because
2929
now it only relies on the CE cache.
30+
. Changed several zend_parse_arg_*_{weak,slow} functions to no longer have an
31+
output pointer argument, but instead return a struct or a sentinel value.
3032

3133
========================
3234
2. Build system changes

Zend/zend_API.c

Lines changed: 90 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -523,71 +523,70 @@ static ZEND_COLD bool zend_null_arg_deprecated(const char *fallback_type, uint32
523523
return !EG(exception);
524524
}
525525

526-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_bool_weak(const zval *arg, bool *dest, uint32_t arg_num) /* {{{ */
526+
ZEND_API zend_opt_bool ZEND_FASTCALL zend_parse_arg_bool_weak(const zval *arg, uint32_t arg_num) /* {{{ */
527527
{
528528
if (EXPECTED(Z_TYPE_P(arg) <= IS_STRING)) {
529529
if (UNEXPECTED(Z_TYPE_P(arg) == IS_NULL) && !zend_null_arg_deprecated("bool", arg_num)) {
530-
return 0;
530+
return (zend_opt_bool){false, false};
531531
}
532-
*dest = zend_is_true(arg);
532+
return (zend_opt_bool){zend_is_true(arg), true};
533533
} else {
534-
return 0;
534+
return (zend_opt_bool){false, false};
535535
}
536-
return 1;
537536
}
538537
/* }}} */
539538

540-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_bool_slow(const zval *arg, bool *dest, uint32_t arg_num) /* {{{ */
539+
ZEND_API zend_opt_bool ZEND_FASTCALL zend_parse_arg_bool_slow(const zval *arg, uint32_t arg_num) /* {{{ */
541540
{
542541
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
543-
return 0;
542+
return (zend_opt_bool){false, false};
544543
}
545-
return zend_parse_arg_bool_weak(arg, dest, arg_num);
544+
return zend_parse_arg_bool_weak(arg, arg_num);
546545
}
547546
/* }}} */
548547

549-
ZEND_API bool ZEND_FASTCALL zend_flf_parse_arg_bool_slow(const zval *arg, bool *dest, uint32_t arg_num)
548+
ZEND_API zend_opt_bool ZEND_FASTCALL zend_flf_parse_arg_bool_slow(const zval *arg, uint32_t arg_num)
550549
{
551550
if (UNEXPECTED(ZEND_FLF_ARG_USES_STRICT_TYPES())) {
552-
return 0;
551+
return (zend_opt_bool){false, false};
553552
}
554-
return zend_parse_arg_bool_weak(arg, dest, arg_num);
553+
return zend_parse_arg_bool_weak(arg, arg_num);
555554
}
556555

557-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_long_weak(const zval *arg, zend_long *dest, uint32_t arg_num) /* {{{ */
556+
ZEND_API zend_opt_long ZEND_FASTCALL zend_parse_arg_long_weak(const zval *arg, uint32_t arg_num) /* {{{ */
558557
{
559558
if (EXPECTED(Z_TYPE_P(arg) == IS_DOUBLE)) {
560559
if (UNEXPECTED(zend_isnan(Z_DVAL_P(arg)))) {
561-
return 0;
560+
goto fail;
562561
}
563562
if (UNEXPECTED(!ZEND_DOUBLE_FITS_LONG(Z_DVAL_P(arg)))) {
564-
return 0;
563+
goto fail;
565564
} else {
566565
zend_long lval = zend_dval_to_lval(Z_DVAL_P(arg));
567566
if (UNEXPECTED(!zend_is_long_compatible(Z_DVAL_P(arg), lval))) {
568567
/* Check arg_num is not (uint32_t)-1, as otherwise its called by
569568
* zend_verify_weak_scalar_type_hint_no_sideeffect() */
570569
if (arg_num != (uint32_t)-1) {
571570
zend_incompatible_double_to_long_error(Z_DVAL_P(arg));
572-
}
573-
if (UNEXPECTED(EG(exception))) {
574-
return 0;
571+
if (UNEXPECTED(EG(exception))) {
572+
goto fail;
573+
}
575574
}
576575
}
577-
*dest = lval;
576+
return (zend_opt_long){lval, true};
578577
}
579578
} else if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
580579
double d;
581580
uint8_t type;
581+
zend_long lval;
582582

583-
if (UNEXPECTED((type = is_numeric_str_function(Z_STR_P(arg), dest, &d)) != IS_LONG)) {
583+
if (UNEXPECTED((type = is_numeric_str_function(Z_STR_P(arg), &lval, &d)) != IS_LONG)) {
584584
if (EXPECTED(type != 0)) {
585-
zend_long lval;
586585
if (UNEXPECTED(zend_isnan(d))) {
587-
return 0;
586+
goto fail;
588587
}
589588
if (UNEXPECTED(!ZEND_DOUBLE_FITS_LONG(d))) {
590-
return 0;
589+
goto fail;
591590
}
592591

593592
lval = zend_dval_to_lval(d);
@@ -597,95 +596,101 @@ ZEND_API bool ZEND_FASTCALL zend_parse_arg_long_weak(const zval *arg, zend_long
597596
* zend_verify_weak_scalar_type_hint_no_sideeffect() */
598597
if (arg_num != (uint32_t)-1) {
599598
zend_incompatible_string_to_long_error(Z_STR_P(arg));
600-
}
601-
if (UNEXPECTED(EG(exception))) {
602-
return 0;
599+
if (UNEXPECTED(EG(exception))) {
600+
goto fail;
601+
}
603602
}
604603
}
605-
*dest = lval;
606604
} else {
607-
return 0;
605+
goto fail;
608606
}
609607
}
610-
if (UNEXPECTED(EG(exception))) {
611-
return 0;
612-
}
608+
return (zend_opt_long){lval, true};
613609
} else if (EXPECTED(Z_TYPE_P(arg) < IS_TRUE)) {
614610
if (UNEXPECTED(Z_TYPE_P(arg) == IS_NULL) && !zend_null_arg_deprecated("int", arg_num)) {
615-
return 0;
611+
goto fail;
616612
}
617-
*dest = 0;
613+
return (zend_opt_long){0, true};
618614
} else if (EXPECTED(Z_TYPE_P(arg) == IS_TRUE)) {
619-
*dest = 1;
620-
} else {
621-
return 0;
615+
return (zend_opt_long){1, true};
622616
}
623-
return 1;
617+
618+
fail:;
619+
zend_opt_long result;
620+
result.has_value = false;
621+
return result;
624622
}
625623
/* }}} */
626624

627-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_long_slow(const zval *arg, zend_long *dest, uint32_t arg_num) /* {{{ */
625+
ZEND_API zend_opt_long ZEND_FASTCALL zend_parse_arg_long_slow(const zval *arg, uint32_t arg_num) /* {{{ */
628626
{
629627
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
630-
return 0;
628+
zend_opt_long result;
629+
result.has_value = false;
630+
return result;
631631
}
632-
return zend_parse_arg_long_weak(arg, dest, arg_num);
632+
return zend_parse_arg_long_weak(arg, arg_num);
633633
}
634634
/* }}} */
635635

636-
ZEND_API bool ZEND_FASTCALL zend_flf_parse_arg_long_slow(const zval *arg, zend_long *dest, uint32_t arg_num)
636+
ZEND_API zend_opt_long ZEND_FASTCALL zend_flf_parse_arg_long_slow(const zval *arg, uint32_t arg_num)
637637
{
638638
if (UNEXPECTED(ZEND_FLF_ARG_USES_STRICT_TYPES())) {
639-
return 0;
639+
zend_opt_long result;
640+
result.has_value = false;
641+
return result;
640642
}
641-
return zend_parse_arg_long_weak(arg, dest, arg_num);
643+
return zend_parse_arg_long_weak(arg, arg_num);
642644
}
643645

644-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_double_weak(const zval *arg, double *dest, uint32_t arg_num) /* {{{ */
646+
ZEND_API zend_opt_double ZEND_FASTCALL zend_parse_arg_double_weak(const zval *arg, uint32_t arg_num) /* {{{ */
645647
{
646648
if (EXPECTED(Z_TYPE_P(arg) == IS_LONG)) {
647-
*dest = (double)Z_LVAL_P(arg);
649+
return (zend_opt_double){(double)Z_LVAL_P(arg), true};
648650
} else if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
649651
zend_long l;
650652
uint8_t type;
653+
double d;
651654

652-
if (UNEXPECTED((type = is_numeric_str_function(Z_STR_P(arg), &l, dest)) != IS_DOUBLE)) {
655+
if (UNEXPECTED((type = is_numeric_str_function(Z_STR_P(arg), &l, &d)) != IS_DOUBLE)) {
653656
if (EXPECTED(type != 0)) {
654-
*dest = (double)(l);
655-
} else {
656-
return 0;
657+
return (zend_opt_double){(double)l, true};
657658
}
658-
}
659-
if (UNEXPECTED(EG(exception))) {
660-
return 0;
659+
goto fail;
660+
} else {
661+
return (zend_opt_double){d, true};
661662
}
662663
} else if (EXPECTED(Z_TYPE_P(arg) < IS_TRUE)) {
663664
if (UNEXPECTED(Z_TYPE_P(arg) == IS_NULL) && !zend_null_arg_deprecated("float", arg_num)) {
664-
return 0;
665+
goto fail;
665666
}
666-
*dest = 0.0;
667+
return (zend_opt_double){0.0, true};
667668
} else if (EXPECTED(Z_TYPE_P(arg) == IS_TRUE)) {
668-
*dest = 1.0;
669-
} else {
670-
return 0;
669+
return (zend_opt_double){1.0, true};
671670
}
672-
return 1;
671+
672+
fail:;
673+
zend_opt_double result;
674+
result.has_value = false;
675+
return result;
673676
}
674677
/* }}} */
675678

676-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_double_slow(const zval *arg, double *dest, uint32_t arg_num) /* {{{ */
679+
ZEND_API zend_opt_double ZEND_FASTCALL zend_parse_arg_double_slow(const zval *arg, uint32_t arg_num) /* {{{ */
677680
{
678681
if (EXPECTED(Z_TYPE_P(arg) == IS_LONG)) {
679682
/* SSTH Exception: IS_LONG may be accepted instead as IS_DOUBLE */
680-
*dest = (double)Z_LVAL_P(arg);
683+
return (zend_opt_double){(double)Z_LVAL_P(arg), true};
681684
} else if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
682-
return 0;
685+
zend_opt_double result;
686+
result.has_value = false;
687+
return result;
683688
}
684-
return zend_parse_arg_double_weak(arg, dest, arg_num);
689+
return zend_parse_arg_double_weak(arg, arg_num);
685690
}
686691
/* }}} */
687692

688-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_slow(zval *arg, zval **dest, uint32_t arg_num) /* {{{ */
693+
ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_slow(zval *arg, uint32_t arg_num) /* {{{ */
689694
{
690695
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
691696
return 0;
@@ -713,13 +718,12 @@ ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_slow(zval *arg, zval **dest, u
713718
} else {
714719
return 0;
715720
}
716-
*dest = arg;
717721
return 1;
718722
}
719723
/* }}} */
720724

721725

722-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_or_str_slow(zval *arg, zval **dest, uint32_t arg_num) /* {{{ */
726+
ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_or_str_slow(zval *arg, uint32_t arg_num) /* {{{ */
723727
{
724728
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
725729
return false;
@@ -737,72 +741,74 @@ ZEND_API bool ZEND_FASTCALL zend_parse_arg_number_or_str_slow(zval *arg, zval **
737741
if (zobj->handlers->cast_object(zobj, &obj, IS_STRING) == SUCCESS) {
738742
OBJ_RELEASE(zobj);
739743
ZVAL_COPY_VALUE(arg, &obj);
740-
*dest = arg;
741744
return true;
742745
}
743746
return false;
744747
} else {
745748
return false;
746749
}
747-
*dest = arg;
748750
return true;
749751
}
750752

751-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_str_weak(zval *arg, zend_string **dest, uint32_t arg_num) /* {{{ */
753+
ZEND_API zend_string * ZEND_FASTCALL zend_parse_arg_str_weak(zval *arg, uint32_t arg_num) /* {{{ */
752754
{
753755
if (EXPECTED(Z_TYPE_P(arg) < IS_STRING)) {
754756
if (UNEXPECTED(Z_TYPE_P(arg) == IS_NULL) && !zend_null_arg_deprecated("string", arg_num)) {
755-
return 0;
757+
return NULL;
756758
}
757759
convert_to_string(arg);
758-
*dest = Z_STR_P(arg);
760+
return Z_STR_P(arg);
759761
} else if (UNEXPECTED(Z_TYPE_P(arg) == IS_OBJECT)) {
760762
zend_object *zobj = Z_OBJ_P(arg);
761763
zval obj;
762764
if (zobj->handlers->cast_object(zobj, &obj, IS_STRING) == SUCCESS) {
763765
OBJ_RELEASE(zobj);
764766
ZVAL_COPY_VALUE(arg, &obj);
765-
*dest = Z_STR_P(arg);
766-
return 1;
767+
return Z_STR_P(arg);
767768
}
768-
return 0;
769+
return NULL;
769770
} else {
770-
return 0;
771+
return NULL;
771772
}
772-
return 1;
773773
}
774774
/* }}} */
775775

776-
ZEND_API bool ZEND_FASTCALL zend_parse_arg_str_slow(zval *arg, zend_string **dest, uint32_t arg_num) /* {{{ */
776+
ZEND_API zend_string * ZEND_FASTCALL zend_parse_arg_str_slow(zval *arg, uint32_t arg_num) /* {{{ */
777777
{
778778
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
779-
return 0;
779+
return NULL;
780780
}
781-
return zend_parse_arg_str_weak(arg, dest, arg_num);
781+
return zend_parse_arg_str_weak(arg, arg_num);
782782
}
783783
/* }}} */
784784

785-
ZEND_API bool ZEND_FASTCALL zend_flf_parse_arg_str_slow(zval *arg, zend_string **dest, uint32_t arg_num)
785+
ZEND_API zend_string * ZEND_FASTCALL zend_flf_parse_arg_str_slow(zval *arg, uint32_t arg_num)
786786
{
787787
if (UNEXPECTED(ZEND_FLF_ARG_USES_STRICT_TYPES())) {
788788
return 0;
789789
}
790-
return zend_parse_arg_str_weak(arg, dest, arg_num);
790+
return zend_parse_arg_str_weak(arg, arg_num);
791791
}
792792

793793
ZEND_API bool ZEND_FASTCALL zend_parse_arg_str_or_long_slow(zval *arg, zend_string **dest_str, zend_long *dest_long, uint32_t arg_num) /* {{{ */
794794
{
795795
if (UNEXPECTED(ZEND_ARG_USES_STRICT_TYPES())) {
796796
return 0;
797797
}
798-
if (zend_parse_arg_long_weak(arg, dest_long, arg_num)) {
798+
zend_opt_long result = zend_parse_arg_long_weak(arg, arg_num);
799+
if (result.has_value) {
800+
*dest_long = result.value;
799801
*dest_str = NULL;
800802
return 1;
801-
} else if (zend_parse_arg_str_weak(arg, dest_str, arg_num)) {
802-
*dest_long = 0;
803-
return 1;
804803
} else {
805-
return 0;
804+
zend_string *str = zend_parse_arg_str_weak(arg, arg_num);
805+
if (str) {
806+
*dest_long = 0;
807+
*dest_str = str;
808+
return 1;
809+
} else {
810+
return 0;
811+
}
806812
}
807813
}
808814
/* }}} */

0 commit comments

Comments
 (0)