Skip to content

Commit ba35ebf

Browse files
committed
Simplify multiple-of-element size access to arrays
Array operations may fall back to byte_extract or byte_update expressions in parts of the code base. Simplify this to index or WITH expressions, respectively, when the offset is known to be a multiple of the array-element size, or a constant offset plus a multiple of the array-element size.
1 parent 3c915eb commit ba35ebf

File tree

6 files changed

+266
-19
lines changed

6 files changed

+266
-19
lines changed
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
int main()
2+
{
3+
char source[8];
4+
int int_source[2];
5+
int target[4];
6+
int n;
7+
if(n >= 0 && n < 3)
8+
{
9+
__CPROVER_array_replace(&target[n], source);
10+
__CPROVER_array_replace(&target[n], int_source);
11+
__CPROVER_assert(target[n] == int_source[0], "");
12+
__CPROVER_assert(target[n + 1] == int_source[1], "");
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
CORE paths-lifo-expected-failure
2+
main.c
3+
--program-only
4+
target!0@1#2 ==
5+
target!0@1#3 ==
6+
^EXIT=0$
7+
^SIGNAL=0$
8+
--
9+
byte_update_
10+
--
11+
This test demonstrates that we can simplify byte_update expressions to, e.g.,
12+
WITH expressions.
13+
Disabled for paths-lifo mode, which does not support --program-only.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CORE
2+
main.c
3+
4+
^VERIFICATION SUCCESSFUL$
5+
^EXIT=0$
6+
^SIGNAL=0$
7+
--
8+
^warning: ignoring
9+
--

regression/validate-trace-xml-schema/check.py

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
['show_properties1', 'test.desc'],
3636
# program-only instead of trace
3737
['vla1', 'program-only.desc'],
38+
['Array_operations4', 'program-only.desc'],
3839
['Pointer_Arithmetic19', 'test.desc'],
3940
['Quantifiers-simplify', 'simplify_not_forall.desc'],
4041
['array-cell-sensitivity15', 'test.desc'],

src/util/pointer_offset_size.cpp

+87-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,93 @@ std::optional<exprt> get_subexpression_at_offset(
687687
const auto offset_bytes = numeric_cast<mp_integer>(offset);
688688

689689
if(!offset_bytes.has_value())
690-
return {};
690+
{
691+
// offset is not a constant; try to see whether it is a multiple of a
692+
// constant
693+
if(auto array_type = type_try_dynamic_cast<array_typet>(expr.type()))
694+
{
695+
const auto target_size_bits = pointer_offset_bits(target_type, ns);
696+
const auto elem_size_bits =
697+
pointer_offset_bits(array_type->element_type(), ns);
698+
699+
// no arrays of zero-, or unknown-sized elements, or ones where elements
700+
// have a bit-width that isn't a multiple of bytes
701+
if(
702+
!target_size_bits.has_value() || !elem_size_bits.has_value() ||
703+
*elem_size_bits <= 0 ||
704+
*elem_size_bits % config.ansi_c.char_width != 0 ||
705+
*target_size_bits != *elem_size_bits)
706+
{
707+
return {};
708+
}
709+
710+
// if we have an offset C + x (where C is a constant) we can try to
711+
// recurse by first looking at the member at offset C
712+
if(
713+
offset.id() == ID_plus && offset.operands().size() == 2 &&
714+
(to_multi_ary_expr(offset).op0().is_constant() ||
715+
to_multi_ary_expr(offset).op1().is_constant()))
716+
{
717+
const plus_exprt &offset_plus = to_plus_expr(offset);
718+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
719+
offset_plus.op0().is_constant() ? offset_plus.op0()
720+
: offset_plus.op1()));
721+
const exprt &other_factor = offset_plus.op0().is_constant()
722+
? offset_plus.op1()
723+
: offset_plus.op0();
724+
725+
auto expr_at_offset_C =
726+
get_subexpression_at_offset(expr, const_factor, target_type, ns);
727+
728+
if(
729+
expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index &&
730+
to_index_expr(*expr_at_offset_C).index().is_zero())
731+
{
732+
return get_subexpression_at_offset(
733+
to_index_expr(*expr_at_offset_C).array(),
734+
other_factor,
735+
target_type,
736+
ns);
737+
}
738+
}
739+
740+
// give up if the offset expression isn't of the form K * i or i * K
741+
// (where K is a constant)
742+
if(
743+
offset.id() != ID_mult || offset.operands().size() != 2 ||
744+
(!to_multi_ary_expr(offset).op0().is_constant() &&
745+
!to_multi_ary_expr(offset).op1().is_constant()))
746+
{
747+
return {};
748+
}
749+
750+
const mult_exprt &offset_mult = to_mult_expr(offset);
751+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
752+
offset_mult.op0().is_constant() ? offset_mult.op0()
753+
: offset_mult.op1()));
754+
const exprt &other_factor =
755+
offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0();
756+
757+
if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0)
758+
return {};
759+
760+
exprt index = mult_exprt{
761+
other_factor,
762+
from_integer(
763+
const_factor / (*elem_size_bits / config.ansi_c.char_width),
764+
other_factor.type())};
765+
766+
return get_subexpression_at_offset(
767+
index_exprt{
768+
expr,
769+
typecast_exprt::conditional_cast(index, array_type->index_type())},
770+
0,
771+
target_type,
772+
ns);
773+
}
774+
else
775+
return {};
776+
}
691777
else
692778
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
693779
}

src/util/simplify_expr.cpp

+142-18
Original file line numberDiff line numberDiff line change
@@ -1717,17 +1717,18 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
17171717
}
17181718
}
17191719

1720-
// the following require a constant offset
17211720
auto offset = numeric_cast<mp_integer>(expr.offset());
1722-
if(!offset.has_value() || *offset < 0)
1721+
if(offset.has_value() && *offset < 0)
17231722
return unchanged(expr);
17241723

17251724
// try to simplify byte_extract(byte_update(...))
17261725
auto const bu = expr_try_dynamic_cast<byte_update_exprt>(expr.op());
17271726
std::optional<mp_integer> update_offset;
17281727
if(bu)
17291728
update_offset = numeric_cast<mp_integer>(bu->offset());
1730-
if(bu && el_size.has_value() && update_offset.has_value())
1729+
if(
1730+
offset.has_value() && bu && el_size.has_value() &&
1731+
update_offset.has_value())
17311732
{
17321733
// byte_extract(byte_update(root, offset_u, value), offset_e) so that the
17331734
// update does not affect what is being extracted simplifies to
@@ -1775,12 +1776,13 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
17751776
// don't do any of the following if endianness doesn't match, as
17761777
// bytes need to be swapped
17771778
if(
1778-
*offset == 0 && ((expr.id() == ID_byte_extract_little_endian &&
1779-
config.ansi_c.endianness ==
1780-
configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) ||
1781-
(expr.id() == ID_byte_extract_big_endian &&
1782-
config.ansi_c.endianness ==
1783-
configt::ansi_ct::endiannesst::IS_BIG_ENDIAN)))
1779+
offset.has_value() && *offset == 0 &&
1780+
((expr.id() == ID_byte_extract_little_endian &&
1781+
config.ansi_c.endianness ==
1782+
configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) ||
1783+
(expr.id() == ID_byte_extract_big_endian &&
1784+
config.ansi_c.endianness ==
1785+
configt::ansi_ct::endiannesst::IS_BIG_ENDIAN)))
17841786
{
17851787
// byte extract of full object is object
17861788
if(expr.type() == expr.op().type())
@@ -1817,7 +1819,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
18171819
return unchanged(expr);
18181820

18191821
if(
1820-
expr.op().id() == ID_array_of &&
1822+
offset.has_value() && expr.op().id() == ID_array_of &&
18211823
to_array_of_expr(expr.op()).op().is_constant())
18221824
{
18231825
const auto const_bits_opt = expr2bits(
@@ -1854,7 +1856,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
18541856

18551857
// in some cases we even handle non-const array_of
18561858
if(
1857-
expr.op().id() == ID_array_of &&
1859+
offset.has_value() && expr.op().id() == ID_array_of &&
18581860
(*offset * expr.get_bits_per_byte()) % (*el_size) == 0 &&
18591861
*el_size <=
18601862
pointer_offset_bits(to_array_of_expr(expr.op()).what().type(), ns))
@@ -1870,7 +1872,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
18701872
expr2bits(expr.op(), expr.id() == ID_byte_extract_little_endian, ns);
18711873

18721874
if(
1873-
bits.has_value() &&
1875+
offset.has_value() && bits.has_value() &&
18741876
mp_integer(bits->size()) >= *el_size + *offset * expr.get_bits_per_byte())
18751877
{
18761878
// make sure we don't lose bits with structs containing flexible array
@@ -1986,7 +1988,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
19861988
const array_typet &array_type = to_array_type(expr.op().type());
19871989
const auto &element_bit_width =
19881990
pointer_offset_bits(array_type.element_type(), ns);
1989-
if(element_bit_width.has_value() && *element_bit_width > 0)
1991+
if(
1992+
offset.has_value() && element_bit_width.has_value() &&
1993+
*element_bit_width > 0)
19901994
{
19911995
if(
19921996
*offset > 0 &&
@@ -2026,7 +2030,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
20262030

20272031
// try to refine it down to extracting from a member or an index in an array
20282032
auto subexpr =
2029-
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);
2033+
get_subexpression_at_offset(expr.op(), expr.offset(), expr.type(), ns);
20302034
if(subexpr.has_value() && subexpr.value() != expr)
20312035
return changed(simplify_rec(subexpr.value())); // recursive call
20322036

@@ -2227,14 +2231,134 @@ simplify_exprt::simplify_byte_update(const byte_update_exprt &expr)
22272231
}
22282232
}
22292233

2230-
// the following require a constant offset
2231-
if(!offset_int.has_value() || *offset_int < 0)
2232-
return unchanged(expr);
2233-
22342234
// size must be known
22352235
if(!val_size.has_value() || *val_size == 0)
22362236
return unchanged(expr);
22372237

2238+
// byte_update(root, offset, value) is with(root, index, value) when root is
2239+
// array-typed, the size of value matches the array-element width, and offset
2240+
// is guaranteed to be a multiple of the array-element width
2241+
if(auto array_type = type_try_dynamic_cast<array_typet>(root.type()))
2242+
{
2243+
auto el_size = pointer_offset_bits(array_type->element_type(), ns);
2244+
2245+
if(el_size.has_value() && *el_size > 0 && *val_size % *el_size == 0)
2246+
{
2247+
if(
2248+
offset_int.has_value() &&
2249+
(*offset_int * expr.get_bits_per_byte()) % *el_size == 0)
2250+
{
2251+
mp_integer base_offset =
2252+
(*offset_int * expr.get_bits_per_byte()) / *el_size;
2253+
with_exprt result_expr{
2254+
root,
2255+
from_integer(base_offset, array_type->index_type()),
2256+
byte_extract_exprt{
2257+
matching_byte_extract_id,
2258+
value,
2259+
from_integer(0, offset.type()),
2260+
expr.get_bits_per_byte(),
2261+
array_type->element_type()}};
2262+
mp_integer n_elements = *val_size / *el_size;
2263+
2264+
for(mp_integer i = 1; i < n_elements; ++i)
2265+
{
2266+
result_expr.add_to_operands(
2267+
from_integer(base_offset + i, array_type->index_type()),
2268+
byte_extract_exprt{
2269+
matching_byte_extract_id,
2270+
value,
2271+
from_integer(
2272+
i * (*el_size / expr.get_bits_per_byte()), offset.type()),
2273+
expr.get_bits_per_byte(),
2274+
array_type->element_type()});
2275+
}
2276+
2277+
return changed(simplify_rec(result_expr));
2278+
}
2279+
// if we have an offset C + x (where C is a constant) we can try to
2280+
// recurse by first looking at the member at offset C
2281+
else if(
2282+
offset.id() == ID_plus && offset.operands().size() == 2 &&
2283+
(to_multi_ary_expr(offset).op0().is_constant() ||
2284+
to_multi_ary_expr(offset).op1().is_constant()))
2285+
{
2286+
const plus_exprt &offset_plus = to_plus_expr(offset);
2287+
const auto &const_factor = offset_plus.op0().is_constant()
2288+
? offset_plus.op0()
2289+
: offset_plus.op1();
2290+
const exprt &other_factor = offset_plus.op0().is_constant()
2291+
? offset_plus.op1()
2292+
: offset_plus.op0();
2293+
2294+
auto tmp = expr;
2295+
tmp.set_offset(const_factor);
2296+
exprt expr_at_offset_C = simplify_byte_update(tmp);
2297+
2298+
if(
2299+
expr_at_offset_C.id() == ID_with &&
2300+
to_with_expr(expr_at_offset_C).where().is_zero())
2301+
{
2302+
tmp.set_op(to_with_expr(expr_at_offset_C).old());
2303+
tmp.set_offset(other_factor);
2304+
return changed(simplify_byte_update(tmp));
2305+
}
2306+
}
2307+
else if(
2308+
offset.id() == ID_mult && offset.operands().size() == 2 &&
2309+
(to_multi_ary_expr(offset).op0().is_constant() ||
2310+
to_multi_ary_expr(offset).op1().is_constant()))
2311+
{
2312+
const mult_exprt &offset_mult = to_mult_expr(offset);
2313+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
2314+
offset_mult.op0().is_constant() ? offset_mult.op0()
2315+
: offset_mult.op1()));
2316+
const exprt &other_factor = offset_mult.op0().is_constant()
2317+
? offset_mult.op1()
2318+
: offset_mult.op0();
2319+
2320+
if((const_factor * expr.get_bits_per_byte()) % *el_size == 0)
2321+
{
2322+
exprt base_offset = mult_exprt{
2323+
other_factor,
2324+
from_integer(
2325+
(const_factor * expr.get_bits_per_byte()) / *el_size,
2326+
other_factor.type())};
2327+
with_exprt result_expr{
2328+
root,
2329+
typecast_exprt::conditional_cast(
2330+
base_offset, array_type->index_type()),
2331+
byte_extract_exprt{
2332+
matching_byte_extract_id,
2333+
value,
2334+
from_integer(0, offset.type()),
2335+
expr.get_bits_per_byte(),
2336+
array_type->element_type()}};
2337+
mp_integer n_elements = *val_size / *el_size;
2338+
for(mp_integer i = 1; i < n_elements; ++i)
2339+
{
2340+
result_expr.add_to_operands(
2341+
typecast_exprt::conditional_cast(
2342+
plus_exprt{base_offset, from_integer(i, base_offset.type())},
2343+
array_type->index_type()),
2344+
byte_extract_exprt{
2345+
matching_byte_extract_id,
2346+
value,
2347+
from_integer(
2348+
i * (*el_size / expr.get_bits_per_byte()), offset.type()),
2349+
expr.get_bits_per_byte(),
2350+
array_type->element_type()});
2351+
}
2352+
return changed(simplify_rec(result_expr));
2353+
}
2354+
}
2355+
}
2356+
}
2357+
2358+
// the following require a constant offset
2359+
if(!offset_int.has_value() || *offset_int < 0)
2360+
return unchanged(expr);
2361+
22382362
// Are we updating (parts of) a struct? Do individual member updates
22392363
// instead, unless there are non-byte-sized bit fields
22402364
if(root.type().id() == ID_struct || root.type().id() == ID_struct_tag)

0 commit comments

Comments
 (0)