diff --git a/.gitmodules b/.gitmodules index f12802369..25d6d4b43 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "ebpf-samples"] path = ebpf-samples url = https://github.com/vbpf/ebpf-samples.git -[submodule "external/radix_tree"] - path = external/radix_tree - url = https://github.com/ytakano/radix_tree [submodule "external/bpf_conformance"] path = external/bpf_conformance url = https://github.com/Alan-Jowett/bpf_conformance.git diff --git a/external/radix_tree b/external/radix_tree deleted file mode 160000 index f8bc846ef..000000000 --- a/external/radix_tree +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f8bc846ef1f00ee43af2ddc04af67d2e44d50685 diff --git a/src/crab/array_domain.cpp b/src/crab/array_domain.cpp index 629669da8..7990e5da7 100644 --- a/src/crab/array_domain.cpp +++ b/src/crab/array_domain.cpp @@ -4,14 +4,12 @@ #include #include #include -#include #include #include #include "boost/endian/conversion.hpp" #include "crab/array_domain.hpp" -#include "radix_tree/radix_tree.hpp" #include "asm_ostream.hpp" #include "config.hpp" @@ -31,142 +29,6 @@ static bool maybe_between(const NumAbsDomain& dom, const bound_t& x, const linea return !tmp.is_bottom(); } -class offset_t final { - index_t _index{}; - int _prefix_length; - - public: - static constexpr int bitsize = 8 * sizeof(index_t); - offset_t() : _prefix_length(bitsize) {} - offset_t(index_t index) : _index(index), _prefix_length(bitsize) {} - offset_t(index_t index, int prefix_length) : _index(index), _prefix_length(prefix_length) {} - explicit operator int() const { return static_cast(_index); } - operator index_t() const { return _index; } - [[nodiscard]] - int prefix_length() const { - return _prefix_length; - } - - index_t operator[](int n) const { return (_index >> (bitsize - 1 - n)) & 1; } -}; - -// NOTE: required by radix_tree -// Get the length of a key, which is the size usable with the [] operator. -[[maybe_unused]] -int radix_length(const offset_t& offset) { - return offset.prefix_length(); -} - -// NOTE: required by radix_tree -// Get a range of bits out of the middle of a key, starting at [begin] for a given length. -[[maybe_unused]] -offset_t radix_substr(const offset_t& key, int begin, int length) { - uint64_t mask; - - if (length == offset_t::bitsize) { - mask = 0; - } else { - mask = ((index_t)1) << length; - } - - mask -= 1; - mask <<= offset_t::bitsize - length - begin; - - index_t value = (((index_t)key) & mask) << begin; - return offset_t{value, length}; -} - -// NOTE: required by radix_tree -// Concatenate two bit patterns. -[[maybe_unused]] -offset_t radix_join(const offset_t& entry1, const offset_t& entry2) { - index_t value1 = (index_t)entry1; - index_t value2 = (index_t)entry2; - index_t value = value1 | (value2 >> entry1.prefix_length()); - int prefix_length = entry1.prefix_length() + entry2.prefix_length(); - - return offset_t{value, prefix_length}; -} - -/*** - Conceptually, a cell is tuple of an array, offset, size, and - scalar variable such that: - -_scalar = array[_offset, _offset + 1, ..., _offset + _size - 1] - - For simplicity, we don't carry the array inside the cell class. - Only, offset_map objects can create cells. They will consider the - array when generating the scalar variable. -*/ -class cell_t final { - private: - friend class offset_map_t; - - offset_t _offset{}; - unsigned _size{}; - - // Only offset_map_t can create cells - cell_t() = default; - - cell_t(offset_t offset, unsigned size) : _offset(offset), _size(size) {} - - static interval_t to_interval(const offset_t o, unsigned size) { - return {number_t{static_cast(o)}, number_t{static_cast(o)} + number_t{static_cast(size - 1)}}; - } - - public: - [[nodiscard]] - interval_t to_interval() const { - return to_interval(_offset, _size); - } - - [[nodiscard]] - bool is_null() const { - return _offset == 0 && _size == 0; - } - - [[nodiscard]] - offset_t get_offset() const { - return _offset; - } - - [[nodiscard]] - variable_t get_scalar(data_kind_t kind) const { - return variable_t::cell_var(kind, number_t{_offset}, _size); - } - - // ignore the scalar variable - bool operator==(const cell_t& o) const { return to_interval() == o.to_interval(); } - - // ignore the scalar variable - bool operator<(const cell_t& o) const { - if (_offset == o._offset) { - return _size < o._size; - } - return _offset < o._offset; - } - - // Return true if [o, o + size) definitely overlaps with the cell, - // where o is a constant expression. - [[nodiscard]] - bool overlap(const offset_t& o, unsigned size) const { - interval_t x = to_interval(); - interval_t y = to_interval(o, size); - bool res = (!(x & y).is_bottom()); - CRAB_LOG("array-expansion-overlap", - std::cout << "**Checking if " << x << " overlaps with " << y << "=" << res << "\n";); - return res; - } - - // Return true if [symb_lb, symb_ub] may overlap with the cell, - // where symb_lb and symb_ub are not constant expressions. - [[nodiscard]] - bool symbolic_overlap(const linear_expression_t& symb_lb, const linear_expression_t& symb_ub, - const NumAbsDomain& dom) const; - - friend std::ostream& operator<<(std::ostream& o, const cell_t& c) { return o << "cell(" << c.to_interval() << ")"; } -}; - // Return true if [symb_lb, symb_ub] may overlap with the cell, // where symb_lb and symb_ub are not constant expressions. bool cell_t::symbolic_overlap(const linear_expression_t& symb_lb, const linear_expression_t& symb_ub, @@ -175,149 +37,29 @@ bool cell_t::symbolic_overlap(const linear_expression_t& symb_lb, const linear_e return maybe_between(dom, x.lb(), symb_lb, symb_ub) || maybe_between(dom, x.ub(), symb_lb, symb_ub); } -// Map offsets to cells -class offset_map_t final { - private: - friend class array_domain_t; - - using cell_set_t = std::set; - - /* - The keys in the patricia tree are processing in big-endian - order. This means that the keys are sorted. Sortedness is - very important to efficiently perform operations such as - checking for overlap cells. Since keys are treated as bit - patterns, negative offsets can be used but they are treated - as large unsigned numbers. - */ - using patricia_tree_t = radix_tree; - - patricia_tree_t _map; - - // for algorithm::lower_bound and algorithm::upper_bound - struct compare_binding_t { - bool operator()(const typename patricia_tree_t::value_type& kv, const offset_t& o) const { - return kv.first < o; - } - bool operator()(const offset_t& o, const typename patricia_tree_t::value_type& kv) const { - return o < kv.first; - } - bool operator()(const typename patricia_tree_t::value_type& kv1, - const typename patricia_tree_t::value_type& kv2) const { - return kv1.first < kv2.first; - } - }; - - void remove_cell(const cell_t& c); - - void insert_cell(const cell_t& c); - - [[nodiscard]] - std::optional get_cell(offset_t o, unsigned size); - - cell_t mk_cell(offset_t o, unsigned size); - - public: - offset_map_t() = default; - - [[nodiscard]] - bool empty() const { - return _map.empty(); - } - - [[nodiscard]] - std::size_t size() const { - return _map.size(); - } - - void operator-=(const cell_t& c) { remove_cell(c); } - - void operator-=(const std::vector& cells) { - for (auto const& c : cells) { - this->operator-=(c); - } - } - - // Return in out all cells that might overlap with (o, size). - std::vector get_overlap_cells(offset_t o, unsigned size); - - [[nodiscard]] - std::vector get_overlap_cells_symbolic_offset(const NumAbsDomain& dom, const linear_expression_t& symb_lb, - const linear_expression_t& symb_ub); - - friend std::ostream& operator<<(std::ostream& o, offset_map_t& m); - - /* Operations needed if used as value in a separate_domain */ - [[nodiscard]] - bool is_top() const { - return empty(); - } - [[nodiscard]] - bool is_bottom() const { - return false; - } - /* - We don't distinguish between bottom and top. - This is fine because separate_domain only calls bottom if - operator[] is called over a bottom state. Thus, we will make - sure that we don't call operator[] in that case. - */ - static offset_map_t bottom() { return offset_map_t(); } - static offset_map_t top() { return offset_map_t(); } -}; - -void offset_map_t::remove_cell(const cell_t& c) { - offset_t key = c.get_offset(); - _map[key].erase(c); -} +void offset_map_t::remove_cell(const cell_t& c) { set.erase(c); } [[nodiscard]] std::vector offset_map_t::get_overlap_cells_symbolic_offset(const NumAbsDomain& dom, const linear_expression_t& symb_lb, const linear_expression_t& symb_ub) { std::vector out; - for (const auto& [_offset, o_cells] : _map) { - // All cells in o_cells have the same offset. They only differ in the size. - // If the largest cell overlaps with [offset, offset + size) - // then the rest of cells are considered to overlap. - // This is an over-approximation because [offset, offset+size) can overlap - // with the largest cell but it doesn't necessarily overlap with smaller cells. - // For efficiency, we assume it overlaps with all. - cell_t largest_cell; - for (const cell_t& c : o_cells) { - if (largest_cell.is_null()) { - largest_cell = c; - } else { - assert(c.get_offset() == largest_cell.get_offset()); - if (largest_cell < c) { - largest_cell = c; - } - } - } - if (!largest_cell.is_null()) { - if (largest_cell.symbolic_overlap(symb_lb, symb_ub, dom)) { - for (auto& c : o_cells) { - out.push_back(c); - } - } + for (const auto& cell : set) { + if (cell.symbolic_overlap(symb_lb, symb_ub, dom)) { + out.push_back(cell); } } return out; } -void offset_map_t::insert_cell(const cell_t& c) { - offset_t key = c.get_offset(); - _map[key].insert(c); -} +void offset_map_t::insert_cell(const cell_t& c) { set.insert(c); } std::optional offset_map_t::get_cell(offset_t o, unsigned size) { - cell_set_t& cells = _map[o]; - auto it = cells.find(cell_t(o, size)); - if (it != cells.end()) { - return *it; + cell_t res{o, size}; + if (!set.contains(res)) { + return {}; } - // not found - return {}; + return res; } cell_t offset_map_t::mk_cell(offset_t o, unsigned size) { @@ -337,140 +79,33 @@ cell_t offset_map_t::mk_cell(offset_t o, unsigned size) { // Return all cells that might overlap with (o, size). std::vector offset_map_t::get_overlap_cells(offset_t o, unsigned size) { std::vector out; - compare_binding_t comp; - - bool added = false; - auto maybe_c = get_cell(o, size); - if (!maybe_c) { - maybe_c = cell_t(o, size); - insert_cell(*maybe_c); - added = true; - } - - auto lb_it = std::lower_bound(_map.begin(), _map.end(), o, comp); - if (lb_it != _map.end()) { - // Store _map[begin,...,lb_it] into a vector so that we can - // go backwards from lb_it. - // - // TODO: give support for reverse iterator in patricia_tree. - std::vector upto_lb; - upto_lb.reserve(std::distance(_map.begin(), lb_it)); - for (auto it = _map.begin(), et = lb_it; it != et; ++it) { - upto_lb.push_back(it->second); - } - upto_lb.push_back(lb_it->second); - - for (int i = static_cast(upto_lb.size() - 1); i >= 0; --i) { - /////// - // All the cells in upto_lb[i] have the same offset. They - // just differ in the size. - // - // If none of the cells in upto_lb[i] overlap with (o, size) - // we can stop. - //////// - bool continue_outer_loop = false; - for (const cell_t& x : upto_lb[i]) { - if (x.overlap(o, size)) { - if (!(x == *maybe_c)) { - // FIXME: we might have some duplicates. this is a very drastic solution. - if (std::find(out.begin(), out.end(), x) == out.end()) { - out.push_back(x); - } - } - continue_outer_loop = true; - } - } - if (!continue_outer_loop) { - break; - } + for (const cell_t& c : set) { + if (c.overlap(o, size)) { + out.push_back(c); } } - - // search for overlapping cells > o - auto ub_it = std::upper_bound(_map.begin(), _map.end(), o, comp); - for (; ub_it != _map.end(); ++ub_it) { - bool continue_outer_loop = false; - for (const cell_t& x : ub_it->second) { - if (x.overlap(o, size)) { - // FIXME: we might have some duplicates. this is a very drastic solution. - if (std::find(out.begin(), out.end(), x) == out.end()) { - out.push_back(x); - } - continue_outer_loop = true; - } - } - if (!continue_outer_loop) { - break; - } - } - - // do not forget the rest of overlapping cells == o - for (auto it = ++lb_it, et = ub_it; it != et; ++it) { - bool continue_outer_loop = false; - for (const cell_t& x : it->second) { - if (x == *maybe_c) { // we dont put it in out - continue; - } - if (x.overlap(o, size)) { - if (std::find(out.begin(), out.end(), x) == out.end()) { - out.push_back(x); - } - continue_outer_loop = true; - } - } - if (!continue_outer_loop) { - break; - } - } - - if (added) { - remove_cell(*maybe_c); - } return out; } -// We use a global array map -using array_map_t = std::unordered_map; - -static thread_local crab::lazy_allocator global_array_map; - -void clear_thread_local_state() { global_array_map.clear(); } - -static offset_map_t& lookup_array_map(data_kind_t kind) { return (*global_array_map)[kind]; } - -/** - Ugly this needs to be fixed: needed if multiple analyses are - run so we can clear the array map from one run to another. -**/ -void clear_global_state() { - if (!global_array_map->empty()) { - if constexpr (crab::CrabSanityCheckFlag) { - CRAB_WARN("array_expansion static variable map is being cleared"); - } - global_array_map->clear(); - } -} - void array_domain_t::initialize_numbers(int lb, int width) { num_bytes.reset(lb, width); - lookup_array_map(data_kind_t::svalues).mk_cell(lb, width); + array_map[data_kind_t::svalues].mk_cell(lb, width); } std::ostream& operator<<(std::ostream& o, offset_map_t& m) { - if (m._map.empty()) { + if (m.set.empty()) { o << "empty"; } else { - for (const auto& [_offset, cells] : m._map) { - o << "{"; - for (auto cit = cells.begin(), cet = cells.end(); cit != cet;) { - o << *cit; - ++cit; - if (cit != cet) { - o << ","; - } + bool first = true; + o << "{"; + for (const cell_t& cell : m.set) { + o << cell; + if (!first) { + o << ","; } - o << "}\n"; + first = false; } + o << "}\n"; } return o; } @@ -484,8 +119,7 @@ void array_domain_t::split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_st std::optional uvalue = load(inv, data_kind_t::uvalues, number_t(cell_start_index), len); // Create a new cell for that range. - offset_map_t& offset_map = lookup_array_map(kind); - cell_t new_cell = offset_map.mk_cell(cell_start_index, len); + cell_t new_cell = array_map[kind].mk_cell(cell_start_index, len); inv.assign(new_cell.get_scalar(data_kind_t::svalues), svalue); inv.assign(new_cell.get_scalar(data_kind_t::uvalues), uvalue); } @@ -495,7 +129,7 @@ void array_domain_t::split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_st void array_domain_t::split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i, const linear_expression_t& elem_size) { assert(kind == data_kind_t::svalues || kind == data_kind_t::uvalues); - offset_map_t& offset_map = lookup_array_map(kind); + offset_map_t& offset_map = array_map[kind]; interval_t ii = inv.eval_interval(i); std::optional n = ii.singleton(); if (!n) { @@ -539,12 +173,12 @@ void array_domain_t::split_number_var(NumAbsDomain& inv, data_kind_t kind, const } // we can only treat this as non-member because we use global state -static std::optional> kill_and_find_var(NumAbsDomain& inv, data_kind_t kind, - const linear_expression_t& i, - const linear_expression_t& elem_size) { +std::optional> array_domain_t::kill_and_find_var(NumAbsDomain& inv, data_kind_t kind, + const linear_expression_t& i, + const linear_expression_t& elem_size) { std::optional> res; - offset_map_t& offset_map = lookup_array_map(kind); + offset_map_t& offset_map = array_map[kind]; interval_t ii = inv.eval_interval(i); std::vector cells; if (std::optional n = ii.singleton()) { @@ -553,7 +187,7 @@ static std::optional> kill_and_find_var(NumAbsDoma if (n_bytes) { auto size = static_cast(*n_bytes); // -- Constant index: kill overlapping cells - offset_t o((uint64_t)*n); + auto o((uint64_t)*n); cells = offset_map.get_overlap_cells(o, size); res = std::make_pair(o, size); } @@ -580,7 +214,7 @@ static std::optional> kill_and_find_var(NumAbsDoma return res; } -bool array_domain_t::all_num(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) { +bool array_domain_t::all_num(const NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) { auto min_lb = inv.eval_interval(lb).lb().number(); auto max_ub = inv.eval_interval(ub).ub().number(); if (!min_lb || !max_ub || !min_lb->fits_sint32() || !max_ub->fits_sint32()) { @@ -610,7 +244,7 @@ int array_domain_t::min_all_num_size(const NumAbsDomain& inv, variable_t offset) } // Get one byte of a value. -std::optional get_value_byte(NumAbsDomain& inv, offset_t o, int width) { +std::optional get_value_byte(const NumAbsDomain& inv, offset_t o, int width) { variable_t v = variable_t::cell_var(data_kind_t::svalues, (o / width) * width, width); std::optional t = inv.eval_interval(v).singleton(); if (!t) { @@ -642,7 +276,7 @@ std::optional get_value_byte(NumAbsDomain& inv, offset_t o, int width) } break; } - uint8_t* bytes = (uint8_t*)&n; + auto* bytes = (uint8_t*)&n; return bytes[o % width]; } @@ -650,8 +284,8 @@ std::optional array_domain_t::load(NumAbsDomain& inv, data_ const linear_expression_t& i, int width) { interval_t ii = inv.eval_interval(i); if (std::optional n = ii.singleton()) { - offset_map_t& offset_map = lookup_array_map(kind); - int64_t k = (int64_t)*n; + offset_map_t& offset_map = array_map[kind]; + auto k = (int64_t)*n; if (kind == data_kind_t::types) { auto [only_num, only_non_num] = num_bytes.uniformity(k, width); if (only_num) { @@ -663,7 +297,7 @@ std::optional array_domain_t::load(NumAbsDomain& inv, data_ } offset_t o(k); unsigned size = (long)width; - if (auto cell = lookup_array_map(kind).get_cell(o, size)) { + if (auto cell = array_map[kind].get_cell(o, size)) { return cell->get_scalar(kind); } if ((kind == data_kind_t::svalues) || (kind == data_kind_t::uvalues)) { @@ -768,18 +402,18 @@ std::optional array_domain_t::load(NumAbsDomain& inv, data_ // We are about to write to a given range of bytes on the stack. // Any cells covering that range need to be removed, and any cells that only // partially cover that range can be split such that any non-covered portions become new cells. -static std::optional> split_and_find_var(array_domain_t& array_domain, NumAbsDomain& inv, - data_kind_t kind, const linear_expression_t& idx, - const linear_expression_t& elem_size) { +std::optional> array_domain_t::split_and_find_var(NumAbsDomain& inv, data_kind_t kind, + const linear_expression_t& idx, + const linear_expression_t& elem_size) { if (kind == data_kind_t::svalues || kind == data_kind_t::uvalues) { - array_domain.split_number_var(inv, kind, idx, elem_size); + this->split_number_var(inv, kind, idx, elem_size); } return kill_and_find_var(inv, kind, idx, elem_size); } std::optional array_domain_t::store(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size, const linear_expression_t& val) { - auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size); + auto maybe_cell = split_and_find_var(inv, kind, idx, elem_size); if (maybe_cell) { // perform strong update auto [offset, size] = *maybe_cell; @@ -791,7 +425,7 @@ std::optional array_domain_t::store(NumAbsDomain& inv, data_kind_t k num_bytes.havoc(offset, size); } } - variable_t v = lookup_array_map(kind).mk_cell(offset, size).get_scalar(kind); + variable_t v = array_map[kind].mk_cell(offset, size).get_scalar(kind); return v; } return {}; @@ -801,7 +435,7 @@ std::optional array_domain_t::store_type(NumAbsDomain& inv, const li const linear_expression_t& elem_size, const linear_expression_t& val) { auto kind = data_kind_t::types; - auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size); + auto maybe_cell = split_and_find_var(inv, kind, idx, elem_size); if (maybe_cell) { // perform strong update auto [offset, size] = *maybe_cell; @@ -811,7 +445,7 @@ std::optional array_domain_t::store_type(NumAbsDomain& inv, const li } else { num_bytes.havoc(offset, size); } - variable_t v = lookup_array_map(kind).mk_cell(offset, size).get_scalar(kind); + variable_t v = array_map[kind].mk_cell(offset, size).get_scalar(kind); return v; } return {}; @@ -824,14 +458,14 @@ std::optional array_domain_t::store_type(NumAbsDomain& inv, const li void array_domain_t::havoc(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) { - auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size); + auto maybe_cell = split_and_find_var(inv, kind, idx, elem_size); if (maybe_cell && kind == data_kind_t::types) { auto [offset, size] = *maybe_cell; num_bytes.havoc(offset, size); } } -void array_domain_t::store_numbers(NumAbsDomain& inv, variable_t _idx, variable_t _width) { +void array_domain_t::store_numbers(const NumAbsDomain& inv, variable_t _idx, variable_t _width) { // TODO: this should be an user parameter. const number_t max_num_elems = EBPF_STACK_SIZE; @@ -862,7 +496,10 @@ void array_domain_t::store_numbers(NumAbsDomain& inv, variable_t _idx, variable_ void array_domain_t::set_to_top() { num_bytes.set_to_top(); } -void array_domain_t::set_to_bottom() { num_bytes.set_to_bottom(); } +void array_domain_t::set_to_bottom() { + num_bytes.set_to_bottom(); + array_map.set_to_bottom(); +} bool array_domain_t::is_bottom() const { return num_bytes.is_bottom(); } @@ -870,9 +507,13 @@ bool array_domain_t::is_top() const { return num_bytes.is_top(); } string_invariant array_domain_t::to_set() const { return num_bytes.to_set(); } -bool array_domain_t::operator<=(const array_domain_t& other) const { return num_bytes <= other.num_bytes; } +bool array_domain_t::operator<=(const array_domain_t& other) const { + return num_bytes <= other.num_bytes && array_map <= other.array_map; +} -bool array_domain_t::operator==(const array_domain_t& other) const { return num_bytes == other.num_bytes; } +bool array_domain_t::operator==(const array_domain_t& other) const { + return num_bytes == other.num_bytes && array_map == other.array_map; +} void array_domain_t::operator|=(const array_domain_t& other) { if (is_bottom()) { @@ -880,27 +521,28 @@ void array_domain_t::operator|=(const array_domain_t& other) { return; } num_bytes |= other.num_bytes; + array_map |= other.array_map; } array_domain_t array_domain_t::operator|(const array_domain_t& other) const { - return array_domain_t(num_bytes | other.num_bytes); + return array_domain_t(num_bytes | other.num_bytes, array_map | other.array_map); } array_domain_t array_domain_t::operator&(const array_domain_t& other) const { - return array_domain_t(num_bytes & other.num_bytes); + return array_domain_t(num_bytes & other.num_bytes, array_map & other.array_map); } array_domain_t array_domain_t::widen(const array_domain_t& other) const { - return array_domain_t(num_bytes | other.num_bytes); + return array_domain_t(num_bytes | other.num_bytes, array_map | other.array_map); } array_domain_t array_domain_t::widening_thresholds(const array_domain_t& other, const iterators::thresholds_t& ts) const { - return array_domain_t(num_bytes | other.num_bytes); + return array_domain_t(num_bytes | other.num_bytes, array_map | other.array_map); } array_domain_t array_domain_t::narrow(const array_domain_t& other) const { - return array_domain_t(num_bytes & other.num_bytes); + return array_domain_t(num_bytes & other.num_bytes, array_map & other.array_map); } std::ostream& operator<<(std::ostream& o, const array_domain_t& dom) { return o << dom.num_bytes; } diff --git a/src/crab/array_domain.hpp b/src/crab/array_domain.hpp index 6e0f75ff9..8bf457ee8 100644 --- a/src/crab/array_domain.hpp +++ b/src/crab/array_domain.hpp @@ -26,6 +26,8 @@ #include #include +#include + #include "crab/add_bottom.hpp" #include "crab/variable.hpp" @@ -36,17 +38,269 @@ namespace crab::domains { // Numerical abstract domain. using NumAbsDomain = AddBottom; -void clear_global_state(); +using offset_t = index_t; +class offset_map_t; + +/*** + Conceptually, a cell is tuple of an array, offset, size, and + scalar variable such that: + +_scalar = array[_offset, _offset + 1, ..., _offset + _size - 1] + + For simplicity, we don't carry the array inside the cell class. + Only, offset_map objects can create cells. They will consider the + array when generating the scalar variable. +*/ +class cell_t final { + private: + friend class offset_map_t; + friend std::ostream& operator<<(std::ostream& o, offset_map_t& m); + offset_t _offset{}; + unsigned _size{}; + + static interval_t to_interval(const offset_t o, unsigned size) { + return {number_t{static_cast(o)}, number_t{static_cast(o)} + number_t{static_cast(size - 1)}}; + } + cell_t(offset_t offset, unsigned size) : _offset(offset), _size(size) {} + + public: + // Only offset_map_t can create cells + cell_t() = delete; + + [[nodiscard]] + interval_t to_interval() const { + return to_interval(_offset, _size); + } + + [[nodiscard]] + offset_t get_offset() const { + return _offset; + } + + [[nodiscard]] + variable_t get_scalar(data_kind_t kind) const { + return variable_t::cell_var(kind, number_t{_offset}, _size); + } + + // ignore the scalar variable + bool operator==(const cell_t& o) const { return to_interval() == o.to_interval(); } + + // ignore the scalar variable + bool operator<(const cell_t& o) const { + if (_offset == o._offset) { + return _size < o._size; + } + return _offset < o._offset; + } + + // Return true if [o, o + size) definitely overlaps with the cell, + // where o is a constant expression. + [[nodiscard]] + bool overlap(const offset_t& o, unsigned size) const { + interval_t x = to_interval(); + interval_t y = to_interval(o, size); + bool res = (!(x & y).is_bottom()); + CRAB_LOG("array-expansion-overlap", + std::cout << "**Checking if " << x << " overlaps with " << y << "=" << res << "\n";); + return res; + } + + // Return true if [symb_lb, symb_ub] may overlap with the cell, + // where symb_lb and symb_ub are not constant expressions. + [[nodiscard]] + bool symbolic_overlap(const linear_expression_t& symb_lb, const linear_expression_t& symb_ub, + const NumAbsDomain& dom) const; + + friend std::ostream& operator<<(std::ostream& o, const cell_t& c) { return o << "cell(" << c.to_interval() << ")"; } +}; + +// Map offsets to cells +class offset_map_t final { + private: + friend class array_domain_t; + + std::set set; + + void remove_cell(const cell_t& c); + + void insert_cell(const cell_t& c); + + [[nodiscard]] + std::optional get_cell(offset_t o, unsigned size); + + cell_t mk_cell(offset_t o, unsigned size); + + public: + offset_map_t() = default; + + [[nodiscard]] + bool empty() const { + return set.empty(); + } + + [[nodiscard]] + std::size_t size() const { + return set.size(); + } + + void operator-=(const cell_t& c) { remove_cell(c); } + + void operator-=(const std::vector& cells) { + for (auto const& c : cells) { + this->operator-=(c); + } + } + + // Return in out all cells that might overlap with (o, size). + std::vector get_overlap_cells(offset_t o, unsigned size); + + [[nodiscard]] + std::vector get_overlap_cells_symbolic_offset(const NumAbsDomain& dom, const linear_expression_t& symb_lb, + const linear_expression_t& symb_ub); + + friend std::ostream& operator<<(std::ostream& o, offset_map_t& m); + + /* Operations needed if used as value in a separate_domain */ + [[nodiscard]] + bool is_top() const { + return empty(); + } + [[nodiscard]] + bool is_bottom() const { + return false; + } + /* + We don't distinguish between bottom and top. + This is fine because separate_domain only calls bottom if + operator[] is called over a bottom state. Thus, we will make + sure that we don't call operator[] in that case. + */ + static offset_map_t bottom() { return {}; } + static offset_map_t top() { return {}; } + bool operator<=(const offset_map_t& other) const { + return std::includes(set.begin(), set.end(), other.set.begin(), other.set.end()); + } + bool operator==(const offset_map_t& other) const { return set == other.set; } + offset_map_t operator|(const offset_map_t& other) const { + offset_map_t res; + std::set_union(set.begin(), set.end(), other.set.begin(), other.set.end(), + std::inserter(res.set, res.set.begin())); + return res; + } + offset_map_t operator&(const offset_map_t& other) const { + offset_map_t res; + std::set_intersection(set.begin(), set.end(), other.set.begin(), other.set.end(), + std::inserter(res.set, res.set.begin())); + return res; + } +}; + +// map abstract domain. Lattice operations are memberwise. +class array_map_t { + boost::container::flat_map map; + + public: + array_map_t() = default; + + [[nodiscard]] + bool empty() const { + return map.empty(); + } + + [[nodiscard]] + std::size_t size() const { + return map.size(); + } + bool operator==(const array_map_t& other) const = default; + + offset_map_t& operator[](data_kind_t kind) { return map[kind]; } + + const offset_map_t& operator[](data_kind_t kind) const { return map.at(kind); } + + void operator-=(const cell_t& c) { + for (auto& [_, v] : map) { + v -= c; + } + } + + void operator-=(const std::vector& cells) { + for (auto const& c : cells) { + this->operator-=(c); + } + } + + [[nodiscard]] + std::vector get_overlap_cells(data_kind_t kind, offset_t o, unsigned size) { + auto it = map.find(kind); + if (it != map.end()) { + return it->second.get_overlap_cells(o, size); + } + return {}; + } + + [[nodiscard]] + std::vector get_overlap_cells_symbolic_offset(data_kind_t kind, const NumAbsDomain& dom, + const linear_expression_t& symb_lb, + const linear_expression_t& symb_ub) { + auto it = map.find(kind); + if (it != map.end()) { + return it->second.get_overlap_cells_symbolic_offset(dom, symb_lb, symb_ub); + } + return {}; + } + + array_map_t operator|(const array_map_t& other) const { + array_map_t res; + for (auto& [kind, offset_map] : map) { + auto it = other.map.find(kind); + if (it != other.map.end()) { + res.map[kind] = offset_map | it->second; + } else { + res.map[kind] = offset_map; + } + } + for (auto& [kind, offset_map] : other.map) { + if (res.map.find(kind) == res.map.end()) { + res.map[kind] = offset_map; + } + } + return res; + } -void clear_thread_local_state(); + array_map_t operator&(const array_map_t& other) const { + array_map_t res; + for (auto& [kind, offset_map] : map) { + auto it = other.map.find(kind); + if (it != other.map.end()) { + res.map[kind] = offset_map & it->second; + } + } + return res; + } + + bool operator<=(const array_map_t& other) const { + return std::ranges::all_of(map, [&other](const auto& p) { + auto it = other.map.find(p.first); + return it != other.map.end() && p.second <= it->second; + }); + } + + array_map_t& operator|=(const array_map_t& other) { + *this = *this | other; + return *this; + } + void set_to_bottom() { map.clear(); } +}; class array_domain_t final { bitset_domain_t num_bytes; + array_map_t array_map; public: array_domain_t() = default; - array_domain_t(const bitset_domain_t& num_bytes) : num_bytes(num_bytes) {} + array_domain_t(const bitset_domain_t& num_bytes, array_map_t array_map) + : num_bytes(num_bytes), array_map(std::move(array_map)) {} void set_to_top(); void set_to_bottom(); @@ -70,7 +324,8 @@ class array_domain_t final { [[nodiscard]] string_invariant to_set() const; - bool all_num(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + bool all_num(const NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + [[nodiscard]] int min_all_num_size(const NumAbsDomain& inv, variable_t offset) const; @@ -86,13 +341,20 @@ class array_domain_t final { const linear_expression_t& elem_size); // Perform array stores over an array segment - void store_numbers(NumAbsDomain& inv, variable_t _idx, variable_t _width); + void store_numbers(const NumAbsDomain& inv, variable_t _idx, variable_t _width); void split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i, const linear_expression_t& elem_size); void split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_start_index, unsigned int len); void initialize_numbers(int lb, int width); + + std::optional> split_and_find_var(NumAbsDomain& inv, data_kind_t kind, + const linear_expression_t& idx, + const linear_expression_t& elem_size); + std::optional> kill_and_find_var(NumAbsDomain& inv, data_kind_t kind, + const linear_expression_t& i, + const linear_expression_t& elem_size); }; } // namespace crab::domains diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 4302e1cfe..5c1dce1b5 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -2173,8 +2173,10 @@ void ebpf_domain_t::do_store_stack(NumAbsDomain& inv, const number_t& width, con std::optional var = stack.store_type(inv, addr, width, val_type); type_inv.assign_type(inv, var, val_type); if (width == 8) { - inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); - inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); + auto slot_svalue = stack.store(inv, data_kind_t::svalues, addr, width, val_svalue); + auto slot_uvalue = stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue); + inv.assign(slot_svalue, val_svalue); + inv.assign(slot_uvalue, val_uvalue); if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_CTX)) { inv.assign(stack.store(inv, data_kind_t::ctx_offsets, addr, width, opt_val_reg->ctx_offset), diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index d6f3aecc6..fd1bc5b6a 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -164,8 +164,10 @@ class ebpf_domain_t final { std::optional get_type_offset_variable(const Reg& reg) const; void scratch_caller_saved_registers(); + void save_callee_saved_registers(const std::string& prefix); void restore_callee_saved_registers(const std::string& prefix); + [[nodiscard]] std::optional get_map_type(const Reg& map_fd_reg) const; [[nodiscard]] @@ -228,6 +230,7 @@ class ebpf_domain_t final { crab::domains::array_domain_t stack; std::function check_require{}; + bool get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const; struct TypeDomain { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 7a8c776e5..e8895883b 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -150,7 +150,6 @@ static checks_db get_analysis_report(std::ostream& s, cfg_t& cfg, crab::invarian static checks_db get_ebpf_report(std::ostream& s, cfg_t& cfg, program_info info, const ebpf_verifier_options_t* options) { global_program_info = std::move(info); - crab::domains::clear_global_state(); crab::variable_t::clear_thread_local_state(); thread_local_options = *options; @@ -194,7 +193,6 @@ std::tuple ebpf_analyze_program_for_test(std::ostream& o const string_invariant& entry_invariant, const program_info& info, const ebpf_verifier_options_t& options) { - crab::domains::clear_global_state(); crab::variable_t::clear_thread_local_state(); thread_local_options = options; @@ -246,6 +244,5 @@ void ebpf_verifier_clear_thread_local_state() { crab::variable_t::clear_thread_local_state(); crab::CrabStats::clear_thread_local_state(); global_program_info.clear(); - crab::domains::clear_thread_local_state(); crab::domains::SplitDBM::clear_thread_local_state(); }