From 959a53557345816f56b37ad747d57d2791976750 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 14 Apr 2025 13:35:35 +0000 Subject: [PATCH] Simplify multiple-of-element size access to arrays Array operations may fall back to byte_extract or byte_update expressions in parts of the code base. Simplify this to index or WITH expressions, respectively, when the offset is known to be a multiple of the array-element size. --- regression/cbmc/Array_operations4/main.c | 14 ++ .../cbmc/Array_operations4/program-only.desc | 13 ++ regression/cbmc/Array_operations4/test.desc | 9 ++ regression/validate-trace-xml-schema/check.py | 1 + src/util/pointer_offset_size.cpp | 53 ++++++- src/util/simplify_expr.cpp | 132 +++++++++++++++--- 6 files changed, 203 insertions(+), 19 deletions(-) create mode 100644 regression/cbmc/Array_operations4/main.c create mode 100644 regression/cbmc/Array_operations4/program-only.desc create mode 100644 regression/cbmc/Array_operations4/test.desc diff --git a/regression/cbmc/Array_operations4/main.c b/regression/cbmc/Array_operations4/main.c new file mode 100644 index 00000000000..18cb51e1ebf --- /dev/null +++ b/regression/cbmc/Array_operations4/main.c @@ -0,0 +1,14 @@ +int main() +{ + char source[8]; + int int_source[2]; + int target[4]; + int n; + if(n >= 0 && n < 3) + { + __CPROVER_array_replace(&target[n], source); + __CPROVER_array_replace(&target[n], int_source); + __CPROVER_assert(target[n] == int_source[0], ""); + __CPROVER_assert(target[n + 1] == int_source[1], ""); + } +} diff --git a/regression/cbmc/Array_operations4/program-only.desc b/regression/cbmc/Array_operations4/program-only.desc new file mode 100644 index 00000000000..89aef7588d2 --- /dev/null +++ b/regression/cbmc/Array_operations4/program-only.desc @@ -0,0 +1,13 @@ +CORE paths-lifo-expected-failure +main.c +--program-only +target!0@1#2 == +target!0@1#3 == +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update_ +-- +This test demonstrates that we can simplify byte_update expressions to, e.g., +WITH expressions. +Disabled for paths-lifo mode, which does not support --program-only. diff --git a/regression/cbmc/Array_operations4/test.desc b/regression/cbmc/Array_operations4/test.desc new file mode 100644 index 00000000000..766e78589d2 --- /dev/null +++ b/regression/cbmc/Array_operations4/test.desc @@ -0,0 +1,9 @@ +CORE +main.c + +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- diff --git a/regression/validate-trace-xml-schema/check.py b/regression/validate-trace-xml-schema/check.py index 9a5ad7426f6..6c869fd3595 100644 --- a/regression/validate-trace-xml-schema/check.py +++ b/regression/validate-trace-xml-schema/check.py @@ -35,6 +35,7 @@ ['show_properties1', 'test.desc'], # program-only instead of trace ['vla1', 'program-only.desc'], + ['Array_operations4', 'program-only.desc'], ['Pointer_Arithmetic19', 'test.desc'], ['Quantifiers-simplify', 'simplify_not_forall.desc'], ['array-cell-sensitivity15', 'test.desc'], diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index e39b355fd86..ead052a4a80 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -687,7 +687,58 @@ std::optional get_subexpression_at_offset( const auto offset_bytes = numeric_cast(offset); if(!offset_bytes.has_value()) - return {}; + { + if(auto array_type = type_try_dynamic_cast(expr.type())) + { + const auto target_size_bits = pointer_offset_bits(target_type, ns); + const auto elem_size_bits = + pointer_offset_bits(array_type->element_type(), ns); + + // no arrays of non-byte-aligned, zero-, or unknown-sized objects + if( + !target_size_bits.has_value() || !elem_size_bits.has_value() || + *elem_size_bits <= 0 || + *elem_size_bits % config.ansi_c.char_width != 0 || + *target_size_bits != *elem_size_bits) + { + return {}; + } + + if( + offset.id() != ID_mult || offset.operands().size() != 2 || + (!to_multi_ary_expr(offset).op0().is_constant() && + !to_multi_ary_expr(offset).op1().is_constant())) + { + return {}; + } + + const mult_exprt &offset_mult = to_mult_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_mult.op0().is_constant() ? offset_mult.op0() + : offset_mult.op1())); + const exprt &other_factor = + offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0(); + + if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0) + return {}; + + exprt index = mult_exprt{ + other_factor, + from_integer( + const_factor / (*elem_size_bits / config.ansi_c.char_width), + other_factor.type())}; + + return get_subexpression_at_offset( + index_exprt{ + expr, + typecast_exprt::conditional_cast(index, array_type->index_type())}, + 0, + target_type, + ns); + } + else + return {}; + } else return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns); } diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index 8b14c8cb55e..fbbfabd3690 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -1717,9 +1717,8 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) } } - // the following require a constant offset auto offset = numeric_cast(expr.offset()); - if(!offset.has_value() || *offset < 0) + if(offset.has_value() && *offset < 0) return unchanged(expr); // try to simplify byte_extract(byte_update(...)) @@ -1727,7 +1726,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) std::optional update_offset; if(bu) update_offset = numeric_cast(bu->offset()); - if(bu && el_size.has_value() && update_offset.has_value()) + if( + offset.has_value() && bu && el_size.has_value() && + update_offset.has_value()) { // byte_extract(byte_update(root, offset_u, value), offset_e) so that the // update does not affect what is being extracted simplifies to @@ -1775,12 +1776,13 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // don't do any of the following if endianness doesn't match, as // bytes need to be swapped if( - *offset == 0 && ((expr.id() == ID_byte_extract_little_endian && - config.ansi_c.endianness == - configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) || - (expr.id() == ID_byte_extract_big_endian && - config.ansi_c.endianness == - configt::ansi_ct::endiannesst::IS_BIG_ENDIAN))) + offset.has_value() && *offset == 0 && + ((expr.id() == ID_byte_extract_little_endian && + config.ansi_c.endianness == + configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) || + (expr.id() == ID_byte_extract_big_endian && + config.ansi_c.endianness == + configt::ansi_ct::endiannesst::IS_BIG_ENDIAN))) { // byte extract of full object is object if(expr.type() == expr.op().type()) @@ -1817,7 +1819,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) return unchanged(expr); if( - expr.op().id() == ID_array_of && + offset.has_value() && expr.op().id() == ID_array_of && to_array_of_expr(expr.op()).op().is_constant()) { const auto const_bits_opt = expr2bits( @@ -1854,7 +1856,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // in some cases we even handle non-const array_of if( - expr.op().id() == ID_array_of && + offset.has_value() && expr.op().id() == ID_array_of && (*offset * expr.get_bits_per_byte()) % (*el_size) == 0 && *el_size <= pointer_offset_bits(to_array_of_expr(expr.op()).what().type(), ns)) @@ -1870,7 +1872,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) expr2bits(expr.op(), expr.id() == ID_byte_extract_little_endian, ns); if( - bits.has_value() && + offset.has_value() && bits.has_value() && mp_integer(bits->size()) >= *el_size + *offset * expr.get_bits_per_byte()) { // make sure we don't lose bits with structs containing flexible array @@ -1986,7 +1988,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) const array_typet &array_type = to_array_type(expr.op().type()); const auto &element_bit_width = pointer_offset_bits(array_type.element_type(), ns); - if(element_bit_width.has_value() && *element_bit_width > 0) + if( + offset.has_value() && element_bit_width.has_value() && + *element_bit_width > 0) { if( *offset > 0 && @@ -2026,7 +2030,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // try to refine it down to extracting from a member or an index in an array auto subexpr = - get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns); + get_subexpression_at_offset(expr.op(), expr.offset(), expr.type(), ns); if(subexpr.has_value() && subexpr.value() != expr) return changed(simplify_rec(subexpr.value())); // recursive call @@ -2227,14 +2231,106 @@ simplify_exprt::simplify_byte_update(const byte_update_exprt &expr) } } - // the following require a constant offset - if(!offset_int.has_value() || *offset_int < 0) - return unchanged(expr); - // size must be known if(!val_size.has_value() || *val_size == 0) return unchanged(expr); + // byte_update(root, offset, value) is with(root, index, value) when root is + // array-typed, the size of value matches the array-element width, and offset + // is guaranteed to be a multiple of the array-element width + if(auto array_type = type_try_dynamic_cast(root.type())) + { + auto el_size = pointer_offset_bits(array_type->element_type(), ns); + + if(el_size.has_value() && *el_size > 0 && *val_size % *el_size == 0) + { + if( + offset_int.has_value() && + (*offset_int * expr.get_bits_per_byte()) % *el_size == 0) + { + mp_integer base_offset = + (*offset_int * expr.get_bits_per_byte()) / *el_size; + with_exprt result_expr{ + root, + from_integer(base_offset, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer(0, offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}}; + mp_integer n_elements = *val_size / *el_size; + + for(mp_integer i = 1; i < n_elements; ++i) + { + result_expr.add_to_operands( + from_integer(base_offset + i, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer( + i * (*el_size / expr.get_bits_per_byte()), offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}); + } + + return changed(simplify_rec(result_expr)); + } + else if( + offset.id() == ID_mult && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + const mult_exprt &offset_mult = to_mult_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_mult.op0().is_constant() ? offset_mult.op0() + : offset_mult.op1())); + const exprt &other_factor = offset_mult.op0().is_constant() + ? offset_mult.op1() + : offset_mult.op0(); + + if((const_factor * expr.get_bits_per_byte()) % *el_size == 0) + { + exprt base_offset = mult_exprt{ + other_factor, + from_integer( + (const_factor * expr.get_bits_per_byte()) / *el_size, + other_factor.type())}; + with_exprt result_expr{ + root, + typecast_exprt::conditional_cast( + base_offset, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer(0, offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}}; + mp_integer n_elements = *val_size / *el_size; + for(mp_integer i = 1; i < n_elements; ++i) + { + result_expr.add_to_operands( + typecast_exprt::conditional_cast( + plus_exprt{base_offset, from_integer(i, base_offset.type())}, + array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer( + i * (*el_size / expr.get_bits_per_byte()), offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}); + } + return changed(simplify_rec(result_expr)); + } + } + } + } + + // the following require a constant offset + if(!offset_int.has_value() || *offset_int < 0) + return unchanged(expr); + // Are we updating (parts of) a struct? Do individual member updates // instead, unless there are non-byte-sized bit fields if(root.type().id() == ID_struct || root.type().id() == ID_struct_tag)