Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for 64bit immediate with type 2 #820

Merged
merged 5 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/asm_cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,8 @@ static std::string instype(Instruction ins) {
return "arith";
} else if (std::holds_alternative<LoadMapFd>(ins)) {
return "assign";
} else if (std::holds_alternative<LoadMapAddress>(ins)) {
return "assign";
} else if (std::holds_alternative<Assume>(ins)) {
return "assume";
} else {
Expand Down
92 changes: 76 additions & 16 deletions src/asm_files.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,20 @@ get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword star
return {program_name, size};
}

void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
void verify_load_instruction(const ebpf_inst& instruction, const std::string& symbol_name, ELFIO::Elf64_Addr offset) {
if ((instruction.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
}
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved

void relocate_map(ebpf_inst& reloc_inst, const std::string& symbol_name,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const program_info& info, const ELFIO::Elf64_Addr offset, const ELFIO::Elf_Word index,
const ELFIO::const_symbol_section_accessor& symbols) {
// Only permit loading the address of the map.
if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
inst.src = 1; // magic number for LoadFd
verify_load_instruction(reloc_inst, symbol_name, offset);
reloc_inst.src = INST_LD_MODE_MAP_FD;

// Relocation value is an offset into the "maps" or ".maps" section.
size_t reloc_value = std::numeric_limits<size_t>::max();
Expand All @@ -187,13 +191,42 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}
}
if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
inst.imm = info.map_descriptors.at(reloc_value).original_fd;
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
}

void relocate_global_variable(ebpf_inst& reloc_inst, ebpf_inst& next_reloc_inst, const std::string& symbol_name,
const program_info& info,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const ELFIO::Elf64_Addr offset) {
// Only permit loading the address of the global variable.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Copy the immediate value to the next instruction.
next_reloc_inst.imm = reloc_inst.imm;
reloc_inst.src = INST_LD_MODE_MAP_VALUE;
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved

size_t reloc_value = std::numeric_limits<size_t>::max();
auto& map_descriptors_offsets = std::get<1>(map_record_size_or_map_offsets);
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}

if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
}

// Structure used to keep track of subprogram relocation data until any subprograms
Expand Down Expand Up @@ -319,6 +352,7 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

program_info info{platform};
std::set<ELFIO::Elf_Half> map_section_indices;
std::set<ELFIO::Elf_Half> global_variable_section_indices;

auto btf = reader.sections[".BTF"];
std::optional<libbtf::btf_type_data> btf_data;
Expand All @@ -338,13 +372,11 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

std::variant<size_t, std::map<std::string, size_t>> map_record_size_or_map_offsets = size_t{0};
ELFIO::const_symbol_section_accessor symbols{reader, symbol_section};
if (!reader.sections[".maps"]) {

if (std::ranges::any_of(reader.sections, [](const auto& section) { return is_map_section(section->get_name()); })) {
map_record_size_or_map_offsets =
parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols);
} else {
if (!btf_data.has_value()) {
throw UnmarshalError("No BTF section found in ELF file " + path);
}
} else if (btf_data.has_value()) {
map_record_size_or_map_offsets = parse_map_section(*btf_data, info.map_descriptors);
// Prevail requires:
// Map fds are sequential starting from 1.
Expand All @@ -366,7 +398,17 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
map_descriptor.inner_map_fd = type_id_to_fd_map[map_descriptor.inner_map_fd];
}
}
map_section_indices.insert(reader.sections[".maps"]->get_index());
if (reader.sections[".maps"]) {
map_section_indices.insert(reader.sections[".maps"]->get_index());
}

for (auto section_name : {".rodata", ".data", ".bss"}) {
if (const auto section = reader.sections[section_name]) {
if (section->get_size() != 0) {
global_variable_section_indices.insert(section->get_index());
}
}
}
}

vector<raw_program> res;
Expand Down Expand Up @@ -426,12 +468,13 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
if (offset / sizeof(ebpf_inst) >= prog.prog.size()) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& inst = prog.prog[offset / sizeof(ebpf_inst)];

ebpf_inst& reloc_inst = prog.prog[offset / sizeof(ebpf_inst)];

auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index);

// Queue up relocation for function symbols.
if (inst.opcode == INST_OP_CALL && inst.src == INST_CALL_LOCAL) {
if (reloc_inst.opcode == INST_OP_CALL && reloc_inst.src == INST_CALL_LOCAL) {
function_relocation fr{.prog_index = res.size(),
.source_offset = offset / sizeof(ebpf_inst),
.relocation_entry_index = index,
Expand All @@ -440,9 +483,26 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
continue;
}

// Verify that this is a map or global variable relocation.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Load instructions are two instructions long, so we need to check the next instruction.
if (prog.prog.size() <= offset / sizeof(ebpf_inst) + 1) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& next_reloc_inst = prog.prog[offset / sizeof(ebpf_inst) + 1];

Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
// Perform relocation for symbols located in the maps section.
if (map_section_indices.contains(symbol_section_index)) {
relocate_map(inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, symbols);
relocate_map(reloc_inst, symbol_name, map_record_size_or_map_offsets, info, offset, index,
symbols);
continue;
}
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved

if (global_variable_section_indices.contains(symbol_section_index)) {
relocate_global_variable(reloc_inst, next_reloc_inst,
reader.sections[symbol_section_index]->get_name(), info,
map_record_size_or_map_offsets, offset);
continue;
}

Expand Down
17 changes: 12 additions & 5 deletions src/asm_marshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ static uint8_t imm_endian(const Un::Op op) {

struct MarshalVisitor {
private:
static vector<ebpf_inst> makeLddw(const Reg dst, const bool isFd, const int32_t imm, const int32_t next_imm) {
static vector<ebpf_inst> makeLddw(const Reg dst, const uint8_t type, const int32_t imm, const int32_t next_imm) {
return {ebpf_inst{.opcode = gsl::narrow<uint8_t>(INST_CLS_LD | width_to_opcode(8)),
.dst = dst.v,
.src = gsl::narrow<uint8_t>(isFd ? 1 : 0),
.src = type,
.offset = 0,
.imm = imm},
ebpf_inst{.opcode = 0, .dst = 0, .src = 0, .offset = 0, .imm = next_imm}};
Expand All @@ -105,14 +105,18 @@ struct MarshalVisitor {
return {};
}

vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, true, b.mapfd, 0); }
vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, INST_LD_MODE_MAP_FD, b.mapfd, 0); }

vector<ebpf_inst> operator()(LoadMapAddress const& b) const {
return makeLddw(b.dst, INST_LD_MODE_MAP_VALUE, b.mapfd, b.offset);
}

vector<ebpf_inst> operator()(Bin const& b) const {
if (b.lddw) {
const auto pimm = std::get_if<Imm>(&b.v);
assert(pimm != nullptr);
auto [imm, next_imm] = split(pimm->v);
return makeLddw(b.dst, false, imm, next_imm);
return makeLddw(b.dst, INST_LD_MODE_IMM, imm, next_imm);
}

ebpf_inst res{.opcode = gsl::narrow<uint8_t>((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)),
Expand Down Expand Up @@ -295,7 +299,7 @@ vector<ebpf_inst> marshal(const Instruction& ins, const pc_t pc) {
return std::visit(MarshalVisitor{crab::label_to_offset16(pc), crab::label_to_offset32(pc)}, ins);
}

static int size(const Instruction& inst) {
int asm_syntax::size(const Instruction& inst) {
if (const auto pins = std::get_if<Bin>(&inst)) {
if (pins->lddw) {
return 2;
Expand All @@ -304,6 +308,9 @@ static int size(const Instruction& inst) {
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
if (std::holds_alternative<LoadMapAddress>(inst)) {
return 2;
}
return 1;
}

Expand Down
14 changes: 2 additions & 12 deletions src/asm_ostream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,8 @@ struct CommandPrinterVisitor {

void operator()(LoadMapFd const& b) { os_ << b.dst << " = map_fd " << b.mapfd; }

void operator()(LoadMapAddress const& b) { os_ << b.dst << " = map_val(" << b.mapfd << ") + " << b.offset; }

// llvm-objdump uses "w<number>" for 32-bit operations and "r<number>" for 64-bit operations.
// We use the same convention here for consistency.
static std::string reg_name(Reg const& a, const bool is64) { return ((is64) ? "r" : "w") + std::to_string(a.v); }
Expand Down Expand Up @@ -542,18 +544,6 @@ string to_string(Assertion const& constraint) {
return str.str();
}

int size(const Instruction& inst) {
if (const auto bin = std::get_if<Bin>(&inst)) {
if (bin->lddw) {
return 2;
}
}
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
return 1;
}

auto get_labels(const InstructionSeq& insts) {
pc_t pc = 0;
std::map<label_t, pc_t> pc_of_label;
Expand Down
13 changes: 13 additions & 0 deletions src/asm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ using crab::number_t;
#define DOT "[.]"
#define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_"

// Match map_val(fd) + offset
#define MAP_VAL R"_(\s*map_val\((\d+)\)\s*\+\s*(\d+)\s*)_"

// Match map_fd fd
#define MAP_FD R"_(\s*map_fd\s+(\d+)\s*)_"

Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
static const std::map<std::string, Bin::Op> str_to_binop = {
{"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL},
{"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND},
Expand Down Expand Up @@ -164,6 +170,13 @@ Instruction parse_instruction(const std::string& line, const std::map<std::strin
}
return Un{.op = str_to_unop.at(m[2]), .dst = reg(m[1]), .is64 = is64_reg(m[1])};
}
if (regex_match(text, m, regex(WREG ASSIGN MAP_VAL))) {
return LoadMapAddress{
.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2]), .offset = boost::lexical_cast<int>(m[3])};
}
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
if (regex_match(text, m, regex(WREG ASSIGN MAP_FD))) {
return LoadMapFd{.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2])};
}
Alan-Jowett marked this conversation as resolved.
Show resolved Hide resolved
if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) {
const std::string r = m[1];
const bool lddw = !m[4].str().empty();
Expand Down
13 changes: 12 additions & 1 deletion src/asm_syntax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ struct LoadMapFd {
constexpr bool operator==(const LoadMapFd&) const = default;
};

// Load the address of a map value into a register.
struct LoadMapAddress {
Reg dst; // Destination register to store the address of the map value.
int32_t mapfd{}; // File descriptor of the map to load the address from.
int32_t offset{}; // Offset within the map, must be within bounds.

constexpr bool operator==(const LoadMapAddress&) const = default;
};

struct Condition {
enum class Op {
EQ,
Expand Down Expand Up @@ -249,7 +258,7 @@ struct IncrementLoopCounter {
};

using Instruction = std::variant<Undefined, Bin, Un, LoadMapFd, Call, CallLocal, Callx, Exit, Jmp, Mem, Packet, Atomic,
Assume, IncrementLoopCounter>;
Assume, IncrementLoopCounter, LoadMapAddress>;

using LabeledInstruction = std::tuple<label_t, Instruction, std::optional<btf_line_info_t>>;
using InstructionSeq = std::vector<LabeledInstruction>;
Expand Down Expand Up @@ -374,6 +383,8 @@ std::string to_string(const Assertion& constraint);
void print(const InstructionSeq& insts, std::ostream& out, const std::optional<const label_t>& label_to_print,
bool print_line_info = false);

int size(const Instruction& inst);

} // namespace asm_syntax

using namespace asm_syntax;
Expand Down
24 changes: 14 additions & 10 deletions src/asm_unmarshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ struct Unmarshaller {
if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) {
throw InvalidInstruction(pc, "invalid lddw");
}
if (inst.src > 1) {
if (inst.src > INST_LD_MODE_MAP_VALUE) {
throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
}
if (inst.offset != 0) {
Expand All @@ -439,22 +439,26 @@ struct Unmarshaller {
throw InvalidInstruction(pc, "bad register");
}

if (inst.src == 1) {
switch (inst.src) {
case INST_LD_MODE_IMM:
return Bin{
.op = Bin::Op::MOV,
.dst = Reg{inst.dst},
.v = Imm{merge(inst.imm, next_imm)},
.is64 = true,
.lddw = true,
};
case INST_LD_MODE_MAP_FD: {
// magic number, meaning we're a per-process file descriptor defining the map.
// (for details, look for BPF_PSEUDO_MAP_FD in the kernel)
if (next.imm != 0) {
throw InvalidInstruction(pc, "lddw uses reserved fields");
}
return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm};
}

return Bin{
.op = Bin::Op::MOV,
.dst = Reg{inst.dst},
.v = Imm{merge(inst.imm, next_imm)},
.is64 = true,
.lddw = true,
};
case INST_LD_MODE_MAP_VALUE: return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm};
default: throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
}
}

static ArgSingle::Kind toArgSingleKind(const ebpf_argument_type_t t) {
Expand Down
1 change: 1 addition & 0 deletions src/assertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class AssertExtractor {
vector<Assertion> operator()(const IncrementLoopCounter& ipc) const { return {{BoundedLoopCount{ipc.name}}}; }

vector<Assertion> operator()(const LoadMapFd&) const { return {}; }
vector<Assertion> operator()(const LoadMapAddress&) const { return {}; }

/// Packet access implicitly uses R6, so verify that R6 still has a pointer to the context.
vector<Assertion> operator()(const Packet&) const { return zero_offset_ctx({6}); }
Expand Down
Loading
Loading