Skip to content

Simplify multiple-of-element size access to arrays #8627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions regression/cbmc/Array_operations4/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
int main()
{
char source[8];
int int_source[2];
int target[4];
int n;
if(n >= 0 && n < 3)
{
__CPROVER_array_replace(&target[n], source);
__CPROVER_array_replace(&target[n], int_source);
__CPROVER_assert(target[n] == int_source[0], "");
__CPROVER_assert(target[n + 1] == int_source[1], "");
}
}
13 changes: 13 additions & 0 deletions regression/cbmc/Array_operations4/program-only.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CORE paths-lifo-expected-failure
main.c
--program-only
target!0@1#2 ==
target!0@1#3 ==
^EXIT=0$
^SIGNAL=0$
--
byte_update_
--
This test demonstrates that we can simplify byte_update expressions to, e.g.,
WITH expressions.
Disabled for paths-lifo mode, which does not support --program-only.
9 changes: 9 additions & 0 deletions regression/cbmc/Array_operations4/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CORE
main.c

^VERIFICATION SUCCESSFUL$
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
--
1 change: 1 addition & 0 deletions regression/validate-trace-xml-schema/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
['show_properties1', 'test.desc'],
# program-only instead of trace
['vla1', 'program-only.desc'],
['Array_operations4', 'program-only.desc'],
['Pointer_Arithmetic19', 'test.desc'],
['Quantifiers-simplify', 'simplify_not_forall.desc'],
['array-cell-sensitivity15', 'test.desc'],
Expand Down
53 changes: 52 additions & 1 deletion src/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,58 @@
const auto offset_bytes = numeric_cast<mp_integer>(offset);

if(!offset_bytes.has_value())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does has_value mean "literal value" ?

return {};
{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the array read encoding ?

if(auto array_type = type_try_dynamic_cast<array_typet>(expr.type()))
{
const auto target_size_bits = pointer_offset_bits(target_type, ns);
const auto elem_size_bits =
pointer_offset_bits(array_type->element_type(), ns);

// no arrays of non-byte-aligned, zero-, or unknown-sized objects
if(
!target_size_bits.has_value() || !elem_size_bits.has_value() ||
*elem_size_bits <= 0 ||
*elem_size_bits % config.ansi_c.char_width != 0 ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this still allow the optimization to apply on arrays of type T where alignof(T) > 8 ?

*target_size_bits != *elem_size_bits)
{
return {};
}

if(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bail if expression is not of the form K * i

offset.id() != ID_mult || offset.operands().size() != 2 ||
(!to_multi_ary_expr(offset).op0().is_constant() &&
!to_multi_ary_expr(offset).op1().is_constant()))
{
return {};
}

const mult_exprt &offset_mult = to_mult_expr(offset);
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
offset_mult.op0().is_constant() ? offset_mult.op0()
: offset_mult.op1()));
const exprt &other_factor =
offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0();

if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0)
return {};

Check warning on line 723 in src/util/pointer_offset_size.cpp

View check run for this annotation

Codecov / codecov/patch

src/util/pointer_offset_size.cpp#L723

Added line #L723 was not covered by tests

exprt index = mult_exprt{
other_factor,
from_integer(
const_factor / (*elem_size_bits / config.ansi_c.char_width),
other_factor.type())};

return get_subexpression_at_offset(
index_exprt{
expr,
typecast_exprt::conditional_cast(index, array_type->index_type())},
0,
target_type,
ns);
}
else
return {};
}
else
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
}
132 changes: 114 additions & 18 deletions src/util/simplify_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1717,17 +1717,18 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
}
}

// the following require a constant offset
auto offset = numeric_cast<mp_integer>(expr.offset());
if(!offset.has_value() || *offset < 0)
if(offset.has_value() && *offset < 0)
return unchanged(expr);

// try to simplify byte_extract(byte_update(...))
auto const bu = expr_try_dynamic_cast<byte_update_exprt>(expr.op());
std::optional<mp_integer> update_offset;
if(bu)
update_offset = numeric_cast<mp_integer>(bu->offset());
if(bu && el_size.has_value() && update_offset.has_value())
if(
offset.has_value() && bu && el_size.has_value() &&
update_offset.has_value())
{
// byte_extract(byte_update(root, offset_u, value), offset_e) so that the
// update does not affect what is being extracted simplifies to
Expand Down Expand Up @@ -1775,12 +1776,13 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
// don't do any of the following if endianness doesn't match, as
// bytes need to be swapped
if(
*offset == 0 && ((expr.id() == ID_byte_extract_little_endian &&
config.ansi_c.endianness ==
configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) ||
(expr.id() == ID_byte_extract_big_endian &&
config.ansi_c.endianness ==
configt::ansi_ct::endiannesst::IS_BIG_ENDIAN)))
offset.has_value() && *offset == 0 &&
((expr.id() == ID_byte_extract_little_endian &&
config.ansi_c.endianness ==
configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) ||
(expr.id() == ID_byte_extract_big_endian &&
config.ansi_c.endianness ==
configt::ansi_ct::endiannesst::IS_BIG_ENDIAN)))
{
// byte extract of full object is object
if(expr.type() == expr.op().type())
Expand Down Expand Up @@ -1817,7 +1819,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
return unchanged(expr);

if(
expr.op().id() == ID_array_of &&
offset.has_value() && expr.op().id() == ID_array_of &&
to_array_of_expr(expr.op()).op().is_constant())
{
const auto const_bits_opt = expr2bits(
Expand Down Expand Up @@ -1854,7 +1856,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)

// in some cases we even handle non-const array_of
if(
expr.op().id() == ID_array_of &&
offset.has_value() && expr.op().id() == ID_array_of &&
(*offset * expr.get_bits_per_byte()) % (*el_size) == 0 &&
*el_size <=
pointer_offset_bits(to_array_of_expr(expr.op()).what().type(), ns))
Expand All @@ -1870,7 +1872,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
expr2bits(expr.op(), expr.id() == ID_byte_extract_little_endian, ns);

if(
bits.has_value() &&
offset.has_value() && bits.has_value() &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this doing exactly ? seems like it restricts the optimization to a subset of cases ?

mp_integer(bits->size()) >= *el_size + *offset * expr.get_bits_per_byte())
{
// make sure we don't lose bits with structs containing flexible array
Expand Down Expand Up @@ -1986,7 +1988,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
const array_typet &array_type = to_array_type(expr.op().type());
const auto &element_bit_width =
pointer_offset_bits(array_type.element_type(), ns);
if(element_bit_width.has_value() && *element_bit_width > 0)
if(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again it seems like the optimization would apply less often ?

offset.has_value() && element_bit_width.has_value() &&
*element_bit_width > 0)
{
if(
*offset > 0 &&
Expand Down Expand Up @@ -2026,7 +2030,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)

// try to refine it down to extracting from a member or an index in an array
auto subexpr =
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);
get_subexpression_at_offset(expr.op(), expr.offset(), expr.type(), ns);
if(subexpr.has_value() && subexpr.value() != expr)
return changed(simplify_rec(subexpr.value())); // recursive call

Expand Down Expand Up @@ -2227,14 +2231,106 @@ simplify_exprt::simplify_byte_update(const byte_update_exprt &expr)
}
}

// the following require a constant offset
if(!offset_int.has_value() || *offset_int < 0)
return unchanged(expr);

// size must be known
if(!val_size.has_value() || *val_size == 0)
return unchanged(expr);

// byte_update(root, offset, value) is with(root, index, value) when root is
// array-typed, the size of value matches the array-element width, and offset
// is guaranteed to be a multiple of the array-element width
if(auto array_type = type_try_dynamic_cast<array_typet>(root.type()))
{
auto el_size = pointer_offset_bits(array_type->element_type(), ns);

if(el_size.has_value() && *el_size > 0 && *val_size % *el_size == 0)
{
if(
offset_int.has_value() &&
(*offset_int * expr.get_bits_per_byte()) % *el_size == 0)
{
mp_integer base_offset =
(*offset_int * expr.get_bits_per_byte()) / *el_size;
with_exprt result_expr{
root,
from_integer(base_offset, array_type->index_type()),
byte_extract_exprt{
matching_byte_extract_id,
value,
from_integer(0, offset.type()),
expr.get_bits_per_byte(),
array_type->element_type()}};
mp_integer n_elements = *val_size / *el_size;

for(mp_integer i = 1; i < n_elements; ++i)
{
result_expr.add_to_operands(
from_integer(base_offset + i, array_type->index_type()),
byte_extract_exprt{
matching_byte_extract_id,
value,
from_integer(
i * (*el_size / expr.get_bits_per_byte()), offset.type()),
expr.get_bits_per_byte(),
array_type->element_type()});
}

return changed(simplify_rec(result_expr));
}
else if(
offset.id() == ID_mult && offset.operands().size() == 2 &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this optimizes the encoding of an array update, and the very similar code above optimized a read ?

(to_multi_ary_expr(offset).op0().is_constant() ||
to_multi_ary_expr(offset).op1().is_constant()))
{
const mult_exprt &offset_mult = to_mult_expr(offset);
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
offset_mult.op0().is_constant() ? offset_mult.op0()
: offset_mult.op1()));
const exprt &other_factor = offset_mult.op0().is_constant()
? offset_mult.op1()
: offset_mult.op0();

if((const_factor * expr.get_bits_per_byte()) % *el_size == 0)
{
exprt base_offset = mult_exprt{
other_factor,
from_integer(
(const_factor * expr.get_bits_per_byte()) / *el_size,
other_factor.type())};
with_exprt result_expr{
root,
typecast_exprt::conditional_cast(
base_offset, array_type->index_type()),
byte_extract_exprt{
matching_byte_extract_id,
value,
from_integer(0, offset.type()),
expr.get_bits_per_byte(),
array_type->element_type()}};
mp_integer n_elements = *val_size / *el_size;
for(mp_integer i = 1; i < n_elements; ++i)
{
result_expr.add_to_operands(
typecast_exprt::conditional_cast(
plus_exprt{base_offset, from_integer(i, base_offset.type())},
array_type->index_type()),
byte_extract_exprt{
matching_byte_extract_id,
value,
from_integer(
i * (*el_size / expr.get_bits_per_byte()), offset.type()),
expr.get_bits_per_byte(),
array_type->element_type()});
}
return changed(simplify_rec(result_expr));
}
}
}
}

// the following require a constant offset
if(!offset_int.has_value() || *offset_int < 0)
return unchanged(expr);

// Are we updating (parts of) a struct? Do individual member updates
// instead, unless there are non-byte-sized bit fields
if(root.type().id() == ID_struct || root.type().id() == ID_struct_tag)
Expand Down
Loading