diff --git a/evm_arithmetization/src/cpu/kernel/asm/cdk_pre_execution.asm b/evm_arithmetization/src/cpu/kernel/asm/cdk_pre_execution.asm index fa8828097..bc1145d63 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/cdk_pre_execution.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/cdk_pre_execution.asm @@ -58,19 +58,18 @@ global update_scalable_l1blockhash: PROVER_INPUT(ger) // stack: l1blockhash?, retdest DUP1 %eq_const(@U256_MAX) %jumpi(skip_and_exit) - // stack: l1blockhash, retdest + PUSH @SEGMENT_KERNEL_GENERAL + // stack: addr, l1blockhash, retdest PUSH @GLOBAL_EXIT_ROOT_STORAGE_POS PROVER_INPUT(ger) - // stack: root, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest - PUSH @SEGMENT_KERNEL_GENERAL - // stack: addr, root, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest + // stack: root, GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest + DUP3 + // stack: addr, root, GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest MSTORE_32BYTES_32 - // stack: addr, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest + // stack: addr', GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest MSTORE_32BYTES_32 - // stack: addr, l1blockhash, retdest - POP - // stack: l1blockhash, retdest - PUSH 64 PUSH @SEGMENT_KERNEL_GENERAL + // stack: addr'', addr, l1blockhash, retdest + %stack (addr_2, addr) -> (addr, 64) // stack: addr, len, l1blockhash, retdest KECCAK_GENERAL // stack: slot, l1blockhash, retdest diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/gas.asm b/evm_arithmetization/src/cpu/kernel/asm/core/gas.asm index 2e16c373e..7b16cbed3 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/gas.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/gas.asm @@ -16,17 +16,7 @@ global sys_gas: %endmacro -// TODO: `%refund_gas` and `refund_gas_hook` are hooks used for debugging. They should be removed at some point and `refund_gas_original` renamed to `refund_gas`. %macro refund_gas - PUSH %%after %jump(refund_gas_hook) -%%after: - %refund_gas_original -%endmacro - -global refund_gas_hook: - JUMP - -%macro refund_gas_original // stack: amount DUP1 %journal_refund %mload_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER) @@ -34,18 +24,8 @@ global refund_gas_hook: %mstore_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER) %endmacro -// TODO: `%charge_gas` and `charge_gas_hook` are hooks used for debugging. They should be removed at some point and `charge_gas_original` renamed to `charge_gas`. -%macro charge_gas - PUSH %%after %jump(charge_gas_hook) -%%after: - %charge_gas_original -%endmacro - -global charge_gas_hook: - JUMP - // Charge gas. Faults if we exceed the limit for the current context. -%macro charge_gas_original +%macro charge_gas // stack: gas, kexit_info %shl_const(192) ADD diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/blake2_f.asm b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/blake2_f.asm index 8ad0e5c44..f8b4c8314 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/blake2_f.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/blake2_f.asm @@ -75,47 +75,39 @@ global precompile_blake2_f: SWAP1 // stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info + %sub_const(8) + // stack: m0_addr + 8 * (16 - 1), t_0, t_1, flag, blake2_f_contd, kexit_info + + PUSH @SEGMENT_CALLDATA + GET_CONTEXT + %build_address_no_offset + %rep 16 - // stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(8) - // stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP1 - // stack: m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %build_address + // stack: base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP2 DUP2 + // stack: base_addr, m0_addr + 8 * (16 - i - 1), base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + ADD // base_addr + offset %mload_packing_u64_LE - // stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - SWAP1 - // stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m_i, base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + SWAP2 %sub_const(8) SWAP1 + // stack: base_addr, m0_addr + 8 * (16 - i - 2), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: base_addr, m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %rep 8 - // stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(8) - // stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP1 - // stack: h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %build_address + // stack: base_addr, h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP2 DUP2 + // stack: base_addr, h0_addr + 8 * (8 - i - 1), base_addr, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + ADD // base_addr + offset %mload_packing_u64_LE - // stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - SWAP1 - // stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h_i, base_addr, h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + SWAP2 %sub_const(8) SWAP1 + // stack: base_addr, h0_addr + 8 * (8 - i - 1), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: h0_addr + 8 * 8 = 68, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - POP - - %stack () -> (@SEGMENT_CALLDATA, 4) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 4, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %build_address_no_offset + // stack: base_addr, garbage, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + + PUSH 4 SWAP2 POP + // stack: base_addr, 4, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info MLOAD_32BYTES // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_add.asm b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_add.asm index 43414e859..038e63a90 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_add.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_add.asm @@ -14,50 +14,47 @@ global precompile_bn_add: %charge_gas_const(@BN_ADD_GAS) + GET_CONTEXT + PUSH @SEGMENT_CALLDATA + %build_address_no_offset + // stack: base_addr, kexit_info + // Load x0, y0, x1, y1 from the call data using `MLOAD_32BYTES`. PUSH bn_add_return - // stack: bn_add_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 96, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 96, 32, bn_add_return, kexit_info - %build_address + // stack: bn_add_return, base_addr, kexit_info + %stack (bn_add_return, base_addr) -> (base_addr, 96, 32, bn_add_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: y1, bn_add_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 64, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 64, 32, y1, bn_add_return, kexit_info - %build_address + // stack: y1, bn_add_return, base_addr, kexit_info + %stack (y1, bn_add_return, base_addr) -> (base_addr, 64, 32, y1, bn_add_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: x1, y1, bn_add_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, 32, x1, y1, bn_add_return, kexit_info - %build_address + // stack: x1, y1, bn_add_return, base_addr, kexit_info + %stack (x1, y1, bn_add_return, base_addr) -> (base_addr, 32, 32, x1, y1, bn_add_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: y0, x1, y1, bn_add_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, y0, x1, y1, bn_add_return, kexit_info - %build_address_no_offset + // stack: y0, x1, y1, bn_add_return, base_addr, kexit_info + %stack (y0, x1, y1, bn_add_return, base_addr) -> (base_addr, 32, y0, x1, y1, bn_add_return, base_addr) MLOAD_32BYTES - // stack: x0, y0, x1, y1, bn_add_return, kexit_info + // stack: x0, y0, x1, y1, bn_add_return, base_addr, kexit_info %jump(bn_add) bn_add_return: - // stack: x, y, kexit_info + // stack: x, y, base_addr, kexit_info DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input. DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input. MUL // Cheaper than AND %jumpi(fault_exception) - // stack: x, y, kexit_info + // stack: x, y, base_addr, kexit_info // Store the result (x, y) to the parent's return data using `mstore_unpacking`. %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64) %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) - %stack (parent_ctx, x, y) -> (parent_ctx, @SEGMENT_RETURNDATA, x, parent_ctx, y) + %stack (parent_ctx, x, y) -> (parent_ctx, @SEGMENT_RETURNDATA, x, y) %build_address_no_offset + // stack: addr_x, x, y, base_addr, kexit_info MSTORE_32BYTES_32 - POP - %stack (parent_ctx, y) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, y) - %build_address + // stack: addr_y = addr_x + 32, y, base_addr, kexit_info MSTORE_32BYTES_32 + // stack: addr, base_addr, kexit_info + POP %jump(pop_and_return_success) diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_mul.asm b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_mul.asm index c29080166..62a25d153 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_mul.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_mul.asm @@ -14,30 +14,28 @@ global precompile_bn_mul: %charge_gas_const(@BN_MUL_GAS) + GET_CONTEXT + PUSH @SEGMENT_CALLDATA + %build_address_no_offset + // stack: base_addr, kexit_info + // Load x, y, n from the call data using `MLOAD_32BYTES`. PUSH bn_mul_return - // stack: bn_mul_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 64, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 64, 32, bn_mul_return, kexit_info - %build_address + // stack: bn_mul_return, base_addr, kexit_info + %stack (bn_mul_return, base_addr) -> (base_addr, 64, 32, bn_mul_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: n, bn_mul_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, 32, n, bn_mul_return, kexit_info - %build_address + // stack: n, bn_mul_return, base_addr, kexit_info + %stack (n, bn_mul_return, base_addr) -> (base_addr, 32, 32, n, bn_mul_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: y, n, bn_mul_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, y, n, bn_mul_return, kexit_info - %build_address_no_offset + // stack: y, n, bn_mul_return, base_addr, kexit_info + %stack (y, n, bn_mul_return, base_addr) -> (base_addr, 32, y, n, bn_mul_return, base_addr) MLOAD_32BYTES - // stack: x, y, n, bn_mul_return, kexit_info + // stack: x, y, n, bn_mul_return, base_addr, kexit_info %jump(bn_mul) bn_mul_return: - // stack: Px, Py, kexit_info + // stack: Px, Py, base_addr, kexit_info DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input. DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input. MUL // Cheaper than AND @@ -55,4 +53,6 @@ bn_mul_contd6: %stack (parent_ctx, Py) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, Py) %build_address MSTORE_32BYTES_32 + // stack: addr, base_addr, kexit_info + POP %jump(pop_and_return_success) diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/ecrec.asm b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/ecrec.asm index 4a27ca75b..8f3c30f7d 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/ecrec.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/ecrec.asm @@ -14,36 +14,33 @@ global precompile_ecrec: %charge_gas_const(@ECREC_GAS) + GET_CONTEXT + PUSH @SEGMENT_CALLDATA + %build_address_no_offset + // stack: base_addr, kexit_info + // Load hash, v, r, s from the call data using `MLOAD_32BYTES`. PUSH ecrec_return - // stack: ecrec_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 96, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 96, 32, ecrec_return, kexit_info - %build_address + // stack: ecrec_return, base_addr, kexit_info + + %stack (ecrec_return, base_addr) -> (base_addr, 96, 32, ecrec_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: s, ecrec_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 64, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 64, 32, s, ecrec_return, kexit_info - %build_address + // stack: s, ecrec_return, base_addr, kexit_info + %stack (s, ecrec_return, base_addr) -> (base_addr, 64, 32, s, ecrec_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: r, s, ecrec_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, 32, r, s, ecrec_return, kexit_info - %build_address + // stack: r, s, ecrec_return, base_addr, kexit_info + %stack (r, s, ecrec_return, base_addr) -> (base_addr, 32, 32, r, s, ecrec_return, base_addr) + ADD // base_addr + offset MLOAD_32BYTES - // stack: v, r, s, ecrec_return, kexit_info - %stack () -> (@SEGMENT_CALLDATA, 32) - GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 32, v, r, s, ecrec_return, kexit_info - %build_address_no_offset + // stack: v, r, s, ecrec_return, base_addr, kexit_info + %stack (v, r, s, ecrec_return, base_addr) -> (base_addr, 32, v, r, s, ecrec_return, base_addr) MLOAD_32BYTES - // stack: hash, v, r, s, ecrec_return, kexit_info + // stack: hash, v, r, s, ecrec_return, base_addr, kexit_info %jump(ecrecover) ecrec_return: - // stack: address, kexit_info + // stack: address, base_addr, kexit_info DUP1 %eq_const(@U256_MAX) %jumpi(ecrec_bad_input) // ecrecover returns U256_MAX on bad input. // Store the result address to the parent's return data using `mstore_unpacking`. @@ -52,9 +49,13 @@ ecrec_return: %stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, address) %build_address_no_offset MSTORE_32BYTES_32 + // stack: addr, base_addr, kexit_info + POP %jump(pop_and_return_success) // On bad input, return empty return data but still return success. ecrec_bad_input: %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0) + // stack: addr, base_addr, kexit_info + POP %jump(pop_and_return_success) diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/expmod.asm b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/expmod.asm index 1dc7841b5..684c80810 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/expmod.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/precompiles/expmod.asm @@ -85,7 +85,7 @@ store_limbs_return: %macro expmod_gas_f // stack: x // Overflow check - DUP1 %ge_const(0x800000000000000000000000000000007) %jumpi(fault_exception) + DUP1 %gt_const(0x800000000000000000000000000000006) %jumpi(fault_exception) // stack: x %ceil_div_const(8) // stack: ceil(x/8) @@ -100,7 +100,7 @@ calculate_l_E_prime: DUP1 %gt_const(0x100000000000000000000000000000000) %jumpi(fault_exception) DUP1 ISZERO %jumpi(case_le_zero) // stack: l_E, l_B, retdest - DUP1 %le_const(32) + DUP1 %lt_const(33) // stack: l_E <= 32, l_E, l_B, retdest %jumpi(case_le_32) // stack: l_E, l_B, retdest @@ -121,7 +121,7 @@ calculate_l_E_prime: // stack: l_E, log2(i[96 + l_B..128 + l_B]), l_B, retdest %sub_const(32) // Overflow check - DUP1 %ge_const(0x2000000000000000000000000000000000000000000000000000000000000000) %jumpi(fault_exception) + DUP1 %gt_const(0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff) %jumpi(fault_exception) %mul_const(8) // stack: 8 * (l_E - 32), log2(i[96 + l_B..128 + l_B]), l_B, retdest ADD diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/process_txn.asm b/evm_arithmetization/src/cpu/kernel/asm/core/process_txn.asm index 07926b85a..28252d012 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/process_txn.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/process_txn.asm @@ -188,11 +188,15 @@ global process_contract_creation_txn_after_constructor: ISZERO %jumpi(contract_creation_fault_3) - // EIP-3541: Reject new contract code starting with the 0xEF byte + // EIP-3541: Reject new contract code starting with the 0xEF byte, if code_size > 0 + %returndatasize // size of the code + DUP1 ISZERO + // stack: code_size == 0, code_size, leftover_gas, new_ctx, address, retdest, success + %jumpi(process_contract_creation_txn_after_ef_check) + // stack: code_size, leftover_gas, new_ctx, address, retdest, success PUSH 0 %mload_current(@SEGMENT_RETURNDATA) %eq_const(0xEF) %jumpi(contract_creation_fault_3_zero_leftover) - // stack: leftover_gas, new_ctx, address, retdest, success - %returndatasize // Size of the code. +process_contract_creation_txn_after_ef_check: // stack: code_size, leftover_gas, new_ctx, address, retdest, success DUP1 %gt_const(@MAX_CODE_SIZE) %jumpi(contract_creation_fault_4) // stack: code_size, leftover_gas, new_ctx, address, retdest, success @@ -487,8 +491,8 @@ contract_creation_fault_3: contract_creation_fault_3_zero_leftover: %revert_checkpoint - // stack: leftover_gas, new_ctx, address, retdest, success - %pop3 + // stack: code_size, leftover_gas, new_ctx, address, retdest, success + %pop4 PUSH 0 // leftover gas // stack: leftover_gas, retdest, success %pay_coinbase_and_refund_sender diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/terminate.asm b/evm_arithmetization/src/cpu/kernel/asm/core/terminate.asm index 1d406097c..2e3482ea3 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/terminate.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/terminate.asm @@ -193,7 +193,7 @@ revert_after_gas: %stack (addr, size, parent_ctx, kexit_info) -> ( parent_ctx, @SEGMENT_RETURNDATA, // DST - addr, // SRC + addr, // SRC size, sys_revert_finish, kexit_info // count, retdest, ... ) %build_address_no_offset diff --git a/evm_arithmetization/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm b/evm_arithmetization/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm index 32eb5b6c1..fea966f8b 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm @@ -76,7 +76,7 @@ global bn_glv_decompose: // along with a flag `underflow` set to 1 if there is an underflow, 0 otherwise. ADD %bn_sub_check_underflow // stack: k2, underflow, N, k, retdest - DUP1 %ge_const(0x80000000000000000000000000000000) %jumpi(negate) + DUP1 %gt_const(0x7fffffffffffffffffffffffffffffff) %jumpi(negate) %jump(contd) negate: // stack: k2, underflow, N, k, retdest diff --git a/evm_arithmetization/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm b/evm_arithmetization/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm index f1385de56..132beac0b 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm @@ -99,11 +99,11 @@ secp_add_valid_points_with_lambda: // stack: N, lambda, x0, y0, x1, y1, retdest DUP3 // stack: x0, N, lambda, x0, y0, x1, y1, retdest - %secp_base + DUP2 // stack: N, x0, N, lambda, x0, y0, x1, y1, retdest DUP7 // stack: x1, N, x0, N, lambda, x0, y0, x1, y1, retdest - %secp_base + DUP2 // stack: N, x1, N, x0, N, lambda, x0, y0, x1, y1, retdest DUP6 // stack: lambda, N, x1, N, x0, N, lambda, x0, y0, x1, y1, retdest @@ -117,7 +117,7 @@ secp_add_valid_points_with_lambda: // stack: x2, lambda, x0, y0, x1, y1, retdest // Compute y2 = lambda*(x1 - x2) - y1 - %secp_base %secp_base %secp_base // Pre-load moduli for incoming SUBMODs + %secp_base DUP1 DUP1 // Pre-load moduli for incoming SUBMODs // stack: N, N, N, x2, lambda, x0, y0, x1, y1, retdest DUP4 // stack: x2, N, N, N, x2, lambda, x0, y0, x1, y1, retdest @@ -244,11 +244,11 @@ global secp_double: // stack: x, y, (y < N) & (x < N) %secp_base // stack: N, x, y, b - %secp_base + DUP1 // stack: N, N, x, y, b DUP3 // stack: x, N, N, x, y, b - %secp_base + DUP2 // stack: N, x, N, N, x, y, b DUP2 // stack: x, N, x, N, N, x, y, b diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/blake2/blake2_f.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/blake2/blake2_f.asm index d1a4a2ab6..aa9951997 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/blake2/blake2_f.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/blake2/blake2_f.asm @@ -35,13 +35,15 @@ global blake2_f: %add_const(7) %rep 8 // stack: addr, ... - DUP1 - // stack: addr, addr, ... + PUSH 1 + // stack: 1, addr, ... + DUP2 + // stack: addr, 1, addr, ... MLOAD_GENERAL - // stack: val, addr, ... - SWAP1 - // stack: addr, val, ... - %decrement + // stack: val, 1, addr, ... + SWAP2 + // stack: addr, 1, val, ... + SUB %endrep // stack: addr, h_0, ..., h_7, rounds, t0, t1, flag, retdest POP @@ -66,16 +68,18 @@ global blake2_f: %rep 4 // stack: i, addr, ... DUP2 + // stack: addr, i, addr, ... + %increment + // stack: addr + 1, i, addr, ... + SWAP2 + // stack: addr, i, addr + 1, ... DUP2 - // stack: i, addr, i, addr, ... + // stack: i, addr, i, addr + 1, ... %blake2_iv - // stack: IV_i, addr, i, addr, ... + // stack: IV_i, addr, i, addr + 1, ... MSTORE_GENERAL - // stack: i, addr, ... + // stack: i, addr + 1, ... %increment - SWAP1 - %increment - SWAP1 // stack: i + 1, addr + 1,... %endrep // stack: 4, start + 12, rounds, t0, t1, flag, retdest diff --git a/evm_arithmetization/src/cpu/kernel/asm/main.asm b/evm_arithmetization/src/cpu/kernel/asm/main.asm index e72c20ee6..cb170ef24 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/main.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/main.asm @@ -130,14 +130,14 @@ global start_txns: #[cfg(feature = eth_mainnet)] { // If txn_idx == 0, update the beacon_root for Ethereum mainnet. - %mload_global_metadata(@GLOBAL_METADATA_TXN_NUMBER_BEFORE) + DUP4 ISZERO %jumpi(set_beacon_root) } #[cfg(feature = cdk_erigon)] { // If txn_idx == 0, perform pre-state execution for CDK erigon. - %mload_global_metadata(@GLOBAL_METADATA_TXN_NUMBER_BEFORE) + DUP4 ISZERO %jumpi(pre_block_execution) } @@ -258,6 +258,7 @@ global check_final_state_trie: %macro reinitialize_memory_pre_txn // Reinitialize accessed addresses and storage keys lists %init_access_lists + PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Reinitialize transient storage %init_transient_storage_len @@ -272,6 +273,7 @@ global check_final_state_trie: PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_JOURNAL_DATA_LEN) PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER) PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) + PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_CREATED_CONTRACTS_LEN) // Reinitialize `chain_id` for legacy transactions and `to` transaction field PUSH 0 %mstore_txn_field(@TXN_FIELD_CHAIN_ID_PRESENT) diff --git a/evm_arithmetization/src/cpu/kernel/asm/memory/syscalls.asm b/evm_arithmetization/src/cpu/kernel/asm/memory/syscalls.asm index 3e3d43f1f..d2148de91 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/memory/syscalls.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/memory/syscalls.asm @@ -102,7 +102,8 @@ calldataload_large_offset: // stack: kexit_info, dest_offset, offset, size GET_CONTEXT PUSH $segment - // stack: segment, context, kexit_info, dest_offset, offset, size + %build_address_no_offset + // stack: base_addr, kexit_info, dest_offset, offset, size %jump(wcopy_within_bounds) %endmacro @@ -130,12 +131,11 @@ codecopy_within_bounds: %jump(memcpy_bytes) wcopy_within_bounds: - // TODO: rework address creation to have less stack manipulation overhead - // stack: segment, src_ctx, kexit_info, dest_offset, offset, size + // stack: base_addr, kexit_info, dest_offset, offset, size GET_CONTEXT - %stack (context, segment, src_ctx, kexit_info, dest_offset, offset, size) -> - (src_ctx, segment, offset, @SEGMENT_MAIN_MEMORY, dest_offset, context, size, wcopy_after, kexit_info) - %build_address + %stack (context, base_addr, kexit_info, dest_offset, offset, size) -> + (base_addr, offset, @SEGMENT_MAIN_MEMORY, dest_offset, context, size, wcopy_after, kexit_info) + ADD // SRC SWAP3 %build_address // stack: DST, SRC, size, wcopy_after, kexit_info %jump(memcpy_bytes) @@ -287,24 +287,26 @@ global sys_mcopy: // stack: kexit_info, dest_offset, offset, size GET_CONTEXT PUSH @SEGMENT_MAIN_MEMORY + %build_address_no_offset - DUP5 DUP5 LT - // stack: dest_offset < offset, kexit_info, dest_offset, offset, size + DUP4 DUP4 LT + // stack: dest_offset < offset, base_addr, kexit_info, dest_offset, offset, size %jumpi(wcopy_within_bounds) - // stack: segment, context, kexit_info, dest_offset, offset, size - DUP6 PUSH 32 %min - // stack: shift=min(size, 32), segment, context, kexit_info, dest_offset, offset, size - DUP6 DUP8 ADD - // stack: offset + size, shift, segment, context, kexit_info, dest_offset, offset, size - DUP6 LT - // stack: dest_offset < offset + size, shift, segment, context, kexit_info, dest_offset, offset, size + // stack: base_addr, kexit_info, dest_offset, offset, size + + DUP5 PUSH 32 %min + // stack: shift=min(size, 32), base_addr, kexit_info, dest_offset, offset, size + DUP5 DUP7 ADD + // stack: offset + size, shift, base_addr, kexit_info, dest_offset, offset, size + DUP5 LT + // stack: dest_offset < offset + size, shift, base_addr, kexit_info, dest_offset, offset, size DUP2 - // stack: shift, dest_offset < offset + size, shift, segment, context, kexit_info, dest_offset, offset, size - DUP9 GT - // stack: size > shift, dest_offset < offset + size, shift, segment, context, kexit_info, dest_offset, offset, size + // stack: shift, dest_offset < offset + size, shift, base_addr, kexit_info, dest_offset, offset, size + DUP8 GT + // stack: size > shift, dest_offset < offset + size, shift, base_addr, kexit_info, dest_offset, offset, size MUL // AND - // stack: (size > shift) && (dest_offset < offset + size), shift, segment, context, kexit_info, dest_offset, offset, size + // stack: (size > shift) && (dest_offset < offset + size), shift, base_addr, kexit_info, dest_offset, offset, size // If the conditions `size > shift` and `dest_offset < offset + size` are satisfied, that means // we will get an overlap that will overwrite some SRC data. In that case, we will proceed to the @@ -313,7 +315,7 @@ global sys_mcopy: // Otherwise, we either have `SRC` < `DST`, or a small enough `size` that a single loop of // `memcpy_bytes` suffices and does not risk to overwrite `SRC` data before being read. - // stack: shift, segment, context, kexit_info, dest_offset, offset, size + // stack: shift, base_addr, kexit_info, dest_offset, offset, size POP %jump(wcopy_within_bounds) @@ -323,24 +325,22 @@ mcopy_with_overlap: // For this, we need to update `offset` and `dest_offset` to their final position, corresponding // to `x + size - min(32, size)`. - // stack: shift=min(size, 32), segment, context, kexit_info, dest_offset, offset, size + // stack: shift=min(size, 32), base_addr, kexit_info, dest_offset, offset, size DUP1 - // stack: shift, shift, segment, context, kexit_info, dest_offset, offset, size - DUP8 DUP8 ADD - // stack: offset+size, shift, shift, segment, context, kexit_info, dest_offset, offset, size + // stack: shift, shift, base_addr, kexit_info, dest_offset, offset, size + DUP7 DUP7 ADD + // stack: offset+size, shift, shift, base_addr, kexit_info, dest_offset, offset, size SUB - // stack: offset'=offset+size-shift, shift, segment, context, kexit_info, dest_offset, offset, size - SWAP5 DUP8 ADD - // stack: dest_offset+size, shift, segment, context, kexit_info, offset', offset, size + // stack: offset'=offset+size-shift, shift, base_addr, kexit_info, dest_offset, offset, size + SWAP4 DUP7 ADD + // stack: dest_offset+size, shift, base_addr, kexit_info, offset', offset, size SUB - // stack: dest_offset'=dest_offset+size-shift, segment, context, kexit_info, offset', offset, size + // stack: dest_offset'=dest_offset+size-shift, base_addr, kexit_info, offset', offset, size - %stack (next_dst_offset, segment, context, kexit_info, new_offset, offset, size) -> - (context, segment, new_offset, segment, next_dst_offset, context, size, wcopy_after, kexit_info) - %build_address // SRC - SWAP3 - %build_address // DST - // stack: DST, SRC, size, wcopy_after, kexit_info + DUP2 ADD // DST + // stack: DST, base_addr, kexit_info, new_offset, offset, size + SWAP3 ADD // SRC + %stack (SRC, kexit_info, DST, offset, size) -> (DST, SRC, size, wcopy_after, kexit_info) %jump(memcpy_bytes_backwards) mcopy_empty: diff --git a/evm_arithmetization/src/cpu/kernel/asm/mpt/hex_prefix.asm b/evm_arithmetization/src/cpu/kernel/asm/mpt/hex_prefix.asm index 0ca2458f0..3e8a783ab 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/mpt/hex_prefix.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/mpt/hex_prefix.asm @@ -6,7 +6,7 @@ // Pre stack: rlp_start_addr, num_nibbles, packed_nibbles, terminated, retdest // Post stack: rlp_end_addr global hex_prefix_rlp: - DUP2 %assert_lt_const(65) + DUP2 %assert_le_const(64) PUSH 2 DUP3 DIV // Compute the length of the hex-prefix string, in bytes: diff --git a/evm_arithmetization/src/cpu/kernel/asm/signed.asm b/evm_arithmetization/src/cpu/kernel/asm/signed.asm index 566d7d5ae..a9e9e3648 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/signed.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/signed.asm @@ -131,11 +131,12 @@ global _sys_sar: // Now assume shift < 256. // Stack: shift, value, return_info PUSH 0x8000000000000000000000000000000000000000000000000000000000000000 - DUP2 + DUP1 + DUP3 SHR - // Stack: 2^255 >> shift, shift, value, return_info - SWAP2 - %add_const(0x8000000000000000000000000000000000000000000000000000000000000000) + // Stack: 2^255 >> shift, 0x8000000000000000000000000000000000000000000000000000000000000000, shift, value, return_info + SWAP3 + ADD // Stack: 2^255 + value, shift, 2^255 >> shift, return_info SWAP1 SHR diff --git a/evm_arithmetization/src/cpu/kernel/asm/util/basic_macros.asm b/evm_arithmetization/src/cpu/kernel/asm/util/basic_macros.asm index 395852810..657ee9760 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/util/basic_macros.asm @@ -174,7 +174,7 @@ PUSH $c // stack: c, input, ... LT // Check it backwards: (input > c) == (c < input) - // stack: input >= c, ... + // stack: input > c, ... %endmacro %macro ge_const(c) diff --git a/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs b/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs index 49fa18d78..1839ec33d 100644 --- a/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs +++ b/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs @@ -65,8 +65,6 @@ pub(crate) enum GlobalMetadata { AccessedStorageKeysLen, /// Length of the self-destruct list. SelfDestructListLen, - /// Length of the bloom entry buffer. - BloomEntryLen, /// Length of the journal. JournalLen, @@ -118,7 +116,7 @@ pub(crate) enum GlobalMetadata { } impl GlobalMetadata { - pub(crate) const COUNT: usize = 55; + pub(crate) const COUNT: usize = 54; /// Unscales this virtual offset by their respective `Segment` value. pub(crate) const fn unscale(&self) -> usize { @@ -157,7 +155,6 @@ impl GlobalMetadata { Self::AccessedAddressesLen, Self::AccessedStorageKeysLen, Self::SelfDestructListLen, - Self::BloomEntryLen, Self::JournalLen, Self::JournalDataLen, Self::CurrentCheckpoint, @@ -219,7 +216,6 @@ impl GlobalMetadata { Self::AccessedAddressesLen => "GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN", Self::AccessedStorageKeysLen => "GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN", Self::SelfDestructListLen => "GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN", - Self::BloomEntryLen => "GLOBAL_METADATA_BLOOM_ENTRY_LEN", Self::JournalLen => "GLOBAL_METADATA_JOURNAL_LEN", Self::JournalDataLen => "GLOBAL_METADATA_JOURNAL_DATA_LEN", Self::CurrentCheckpoint => "GLOBAL_METADATA_CURRENT_CHECKPOINT", diff --git a/evm_arithmetization/src/cpu/kernel/optimizer.rs b/evm_arithmetization/src/cpu/kernel/optimizer.rs index 4890b6f37..2fac05e52 100644 --- a/evm_arithmetization/src/cpu/kernel/optimizer.rs +++ b/evm_arithmetization/src/cpu/kernel/optimizer.rs @@ -1,3 +1,5 @@ +use std::cmp::max; + use ethereum_types::U256; use Item::{Push, StandardOp}; use PushTarget::Literal; @@ -10,6 +12,7 @@ use crate::cpu::kernel::utils::{replace_windows, u256_from_bool}; pub(crate) fn optimize_asm(code: &mut Vec) { // Run the optimizer until nothing changes. + let before = code.len(); loop { let old_code = code.clone(); optimize_asm_once(code); @@ -17,6 +20,13 @@ pub(crate) fn optimize_asm(code: &mut Vec) { break; } } + let after = code.len(); + log::trace!( + "Assembly optimizer: {}->{} ({}%).", + before, + after, + 100 * after / max(1, before) + ); } /// A single optimization pass. @@ -27,6 +37,7 @@ fn optimize_asm_once(code: &mut Vec) { remove_swapped_pushes(code); remove_swaps_commutative(code); remove_ignored_values(code); + de_morgan(code); } /// Constant propagation. @@ -142,15 +153,55 @@ fn remove_swaps_commutative(code: &mut Vec) { // Could be extended to other non-side-effecting operations, e.g. [DUP1, ADD, // POP] -> [POP]. fn remove_ignored_values(code: &mut Vec) { - replace_windows(code, |[a, b]| { - if let StandardOp(pop) = b - && &pop == "POP" + replace_windows(code, |window| { + if let [a, StandardOp(pop)] = window + && is_push_or_dup(&a) + && pop == "POP" { - match a { - Push(_) => Some(vec![]), - StandardOp(dup) if dup.starts_with("DUP") => Some(vec![]), - _ => None, - } + Some(vec![]) + } else { + None + } + }); +} + +/// Helper predicate for the De Morgan rules. +fn is_push_or_dup(op: &Item) -> bool { + if matches!(&op, &Push(_)) { + return true; + }; + if let StandardOp(inner) = op + && inner.starts_with("DUP") + { + return true; + } + false +} + +/// De Morgan's First Law: `(not A) and (not B) = not (A or B)`. +/// e.g. `[PUSH a, NOT, PUSH b, NOT, AND] -> [PUSH a, PUSH b, OR, NOT]`. +/// De Morgan's Second Law: `(not A) or (not B) = not (A and B)`. +/// e.g. `[PUSH a, NOT, PUSH b, NOT, OR] -> [PUSH a, PUSH b, AND, NOT]`. +/// This also handles `DUP` operations. +fn de_morgan(code: &mut Vec) { + replace_windows(code, |window| { + if let [op0, StandardOp(op1), op2, StandardOp(op3), StandardOp(op4)] = window + && is_push_or_dup(&op0) + && op1 == "NOT" + && is_push_or_dup(&op2) + && op3 == "NOT" + && (op4 == "AND" || op4 == "OR") + { + Some(vec![ + op0, + op2, + if op4 == "AND" { + StandardOp("OR".into()) + } else { + StandardOp("AND".into()) + }, + StandardOp("NOT".into()), + ]) } else { None } @@ -285,4 +336,44 @@ mod tests { remove_ignored_values(&mut code); assert_eq!(code, vec![]); } + + #[test] + fn test_demorgan1() { + let mut before = vec![ + Push(Literal(3.into())), + StandardOp("NOT".into()), + StandardOp("DUP1".into()), + StandardOp("NOT".into()), + StandardOp("AND".into()), + ]; + let after = vec![ + Push(Literal(3.into())), + StandardOp("DUP1".into()), + StandardOp("OR".into()), + StandardOp("NOT".into()), + ]; + assert!(is_code_improved(&before, &after)); + de_morgan(&mut before); + assert_eq!(before, after); + } + + #[test] + fn test_demorgan2() { + let mut before = vec![ + Push(Literal(3.into())), + StandardOp("NOT".into()), + Push(Literal(8.into())), + StandardOp("NOT".into()), + StandardOp("OR".into()), + ]; + let after = vec![ + Push(Literal(3.into())), + Push(Literal(8.into())), + StandardOp("AND".into()), + StandardOp("NOT".into()), + ]; + assert!(is_code_improved(&before, &after)); + de_morgan(&mut before); + assert_eq!(before, after); + } } diff --git a/evm_arithmetization/src/fixed_recursive_verifier.rs b/evm_arithmetization/src/fixed_recursive_verifier.rs index 746f63f44..0d7bb82dd 100644 --- a/evm_arithmetization/src/fixed_recursive_verifier.rs +++ b/evm_arithmetization/src/fixed_recursive_verifier.rs @@ -3174,7 +3174,7 @@ mod tests { "Create all recursive circuits", AllRecursiveCircuits::::new( &all_stark, - &[16..17, 8..9, 7..8, 4..9, 8..9, 4..7, 17..18, 17..18, 17..18], + &[16..17, 8..9, 7..8, 4..9, 8..9, 4..7, 16..17, 16..17, 16..17], &config, ) ); diff --git a/evm_arithmetization/src/memory/segments.rs b/evm_arithmetization/src/memory/segments.rs index e1b6678f6..a9534830a 100644 --- a/evm_arithmetization/src/memory/segments.rs +++ b/evm_arithmetization/src/memory/segments.rs @@ -100,10 +100,12 @@ impl Segment { pub(crate) const COUNT: usize = 39; /// Unscales this segment by `SEGMENT_SCALING_FACTOR`. + #[inline(always)] pub(crate) const fn unscale(&self) -> usize { *self as usize >> SEGMENT_SCALING_FACTOR } + #[inline(always)] pub(crate) const fn all() -> [Self; Self::COUNT] { [ Self::Code, diff --git a/evm_arithmetization/src/witness/memory.rs b/evm_arithmetization/src/witness/memory.rs index ff616a348..7abced8a1 100644 --- a/evm_arithmetization/src/witness/memory.rs +++ b/evm_arithmetization/src/witness/memory.rs @@ -23,7 +23,8 @@ use crate::witness::errors::ProgramError; use crate::witness::errors::ProgramError::MemoryError; impl MemoryChannel { - pub(crate) fn index(&self) -> usize { + #[inline(always)] + pub(crate) const fn index(&self) -> usize { match *self { Code => 0, GeneralPurpose(n) => { @@ -43,6 +44,7 @@ pub struct MemoryAddress { } impl MemoryAddress { + #[inline(always)] pub(crate) const fn new(context: usize, segment: Segment, virt: usize) -> Self { Self { context, @@ -69,7 +71,8 @@ impl MemoryAddress { Ok(Self::new(context, Segment::all()[segment], virt)) } - pub(crate) fn increment(&mut self) { + #[inline(always)] + pub(crate) const fn increment(&mut self) { self.virt = self.virt.saturating_add(1); } } @@ -104,7 +107,8 @@ pub(crate) static DUMMY_MEMOP: MemoryOp = MemoryOp { }; impl MemoryOp { - pub(crate) fn new( + #[inline(always)] + pub(crate) const fn new( channel: MemoryChannel, clock: usize, address: MemoryAddress, @@ -123,6 +127,7 @@ impl MemoryOp { } } + #[inline(always)] pub(crate) const fn new_dummy_read( address: MemoryAddress, timestamp: usize, @@ -137,6 +142,7 @@ impl MemoryOp { } } + #[inline(always)] pub(crate) const fn sorting_key(&self) -> (usize, usize, usize, usize) { ( self.address.context, @@ -175,6 +181,7 @@ impl MemoryState { } } + #[inline] pub(crate) fn get(&self, address: MemoryAddress) -> Option { if address.context >= self.contexts.len() { return None; @@ -188,7 +195,7 @@ impl MemoryState { return None; } let val = self.contexts[address.context].segments[address.segment].get(address.virt); - assert!( + debug_assert!( val.bits() <= segment.bit_range(), "Value {} exceeds {:?} range of {} bits", val, @@ -245,6 +252,7 @@ impl MemoryState { } } + #[inline] pub(crate) fn set(&mut self, address: MemoryAddress, val: U256) { while address.context >= self.contexts.len() { self.contexts.push(MemoryContextState::default()); @@ -252,7 +260,7 @@ impl MemoryState { let segment = Segment::all()[address.segment]; - assert!( + debug_assert!( val.bits() <= segment.bit_range(), "Value {} exceeds {:?} range of {} bits", val, @@ -320,6 +328,7 @@ pub(crate) struct MemorySegmentState { } impl MemorySegmentState { + #[inline] pub(crate) fn get(&self, virtual_addr: usize) -> U256 { self.content .get(virtual_addr) @@ -328,6 +337,7 @@ impl MemorySegmentState { .unwrap_or_default() } + #[inline] pub(crate) fn set(&mut self, virtual_addr: usize, value: U256) { if virtual_addr >= self.content.len() { self.content.resize(virtual_addr + 1, None); diff --git a/evm_arithmetization/src/witness/traces.rs b/evm_arithmetization/src/witness/traces.rs index 3ff68d8b6..cb9e605e8 100644 --- a/evm_arithmetization/src/witness/traces.rs +++ b/evm_arithmetization/src/witness/traces.rs @@ -126,10 +126,12 @@ impl Traces { self.poseidon_ops.truncate(checkpoint.poseidon_len); } + #[inline(always)] pub(crate) fn mem_ops_since(&self, checkpoint: TraceCheckpoint) -> &[MemoryOp] { &self.memory_ops[checkpoint.memory_len..] } + #[inline(always)] pub(crate) fn clock(&self) -> usize { self.cpu.len() } diff --git a/evm_arithmetization/src/witness/util.rs b/evm_arithmetization/src/witness/util.rs index 5bd103096..b08009e06 100644 --- a/evm_arithmetization/src/witness/util.rs +++ b/evm_arithmetization/src/witness/util.rs @@ -16,12 +16,14 @@ use crate::memory::segments::Segment; use crate::witness::errors::ProgramError; use crate::witness::memory::{MemoryAddress, MemoryChannel, MemoryOp, MemoryOpKind}; +#[inline(always)] fn to_byte_checked(n: U256) -> u8 { let res = n.byte(0); assert_eq!(n, res.into()); res } +#[inline(always)] fn to_bits_le(n: u8) -> [F; 8] { let mut res = [F::ZERO; 8]; for (i, bit) in res.iter_mut().enumerate() { @@ -76,7 +78,8 @@ pub(crate) fn fill_channel_with_value( /// Pushes without writing in memory. This happens in opcodes where a push /// immediately follows a pop. -pub(crate) fn push_no_write(state: &mut GenerationState, val: U256) { +#[inline(always)] +pub(crate) const fn push_no_write(state: &mut GenerationState, val: U256) { state.registers.stack_top = val; state.registers.stack_len += 1; } @@ -135,6 +138,7 @@ pub(crate) fn mem_read_with_log( (val, op) } +#[inline(always)] pub(crate) fn mem_write_log( channel: MemoryChannel, address: MemoryAddress, diff --git a/scripts/prove_stdio.sh b/scripts/prove_stdio.sh index f0db105a8..48e5ecf6a 100755 --- a/scripts/prove_stdio.sh +++ b/scripts/prove_stdio.sh @@ -62,10 +62,10 @@ if ! [[ $TEST_ONLY == "test_only" ]]; then echo "Using specific circuit sizes for witness_b19807080.json" export ARITHMETIC_CIRCUIT_SIZE="16..18" export BYTE_PACKING_CIRCUIT_SIZE="8..15" - export CPU_CIRCUIT_SIZE="14..20" - export KECCAK_CIRCUIT_SIZE="10..18" + export CPU_CIRCUIT_SIZE="9..20" + export KECCAK_CIRCUIT_SIZE="7..18" export KECCAK_SPONGE_CIRCUIT_SIZE="8..14" - export LOGIC_CIRCUIT_SIZE="8..17" + export LOGIC_CIRCUIT_SIZE="5..17" export MEMORY_CIRCUIT_SIZE="17..22" export MEMORY_BEFORE_CIRCUIT_SIZE="16..20" export MEMORY_AFTER_CIRCUIT_SIZE="7..20" @@ -81,7 +81,7 @@ if ! [[ $TEST_ONLY == "test_only" ]]; then export KECCAK_SPONGE_CIRCUIT_SIZE="8..9" export LOGIC_CIRCUIT_SIZE="4..14" export MEMORY_CIRCUIT_SIZE="17..22" - export MEMORY_BEFORE_CIRCUIT_SIZE="17..18" + export MEMORY_BEFORE_CIRCUIT_SIZE="16..18" export MEMORY_AFTER_CIRCUIT_SIZE="7..8" export POSEIDON_CIRCUIT_SIZE="4..8" else @@ -128,7 +128,7 @@ cargo build --release --jobs "$num_procs" start_time=$(date +%s%N) -nice -19 "${REPO_ROOT}/target/release/leader" --runtime in-memory --load-strategy on-demand --block-batch-size $BLOCK_BATCH_SIZE \ +nice -19 "${REPO_ROOT}/target/release/leader" --runtime in-memory --load-strategy on-demand -n 1 --block-batch-size $BLOCK_BATCH_SIZE \ --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE |& tee $OUTPUT_LOG end_time=$(date +%s%N)