From 395071a37a48a0a29da3eac46592008faefb5218 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 16 Aug 2024 15:30:04 -0700 Subject: [PATCH 1/6] libdrgn: util: add qsort_arg() helper function In commit c8ff8728 ("Support systems without qsort_r"), usage of qsort_r was eliminated because it is a glibc extension. There was discussion of creating a utility function that implements qsort_r(), but the approach described uses thread local variables, so it is not actually reentrant, and it was dropped to avoid confusion. However, upcoming commits will also prefer a comparator function which takes an argument, and they also won't require a reentrant implementation. Add this helper in with a name that shouldn't spark confusion: qsort_arg(). Signed-off-by: Stephen Brennan --- libdrgn/Makefile.am | 1 + libdrgn/orc_info.c | 12 +++++------- libdrgn/util.c | 19 +++++++++++++++++++ libdrgn/util.h | 11 +++++++++++ 4 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 libdrgn/util.c diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 5dcb1f964..0ce6639e3 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -116,6 +116,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ symbol.h \ type.c \ type.h \ + util.c \ util.h \ vector.h diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 31793137c..8a82a72c9 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -54,10 +54,9 @@ drgn_raw_orc_entry_is_terminator(struct drgn_module *module, unsigned int i) } } -static _Thread_local struct drgn_module *compare_orc_entries_module; -static int compare_orc_entries(const void *a, const void *b) +static int compare_orc_entries(const void *a, const void *b, void *arg) { - struct drgn_module *module = compare_orc_entries_module; + struct drgn_module *module = arg; unsigned int index_a = *(unsigned int *)a; unsigned int index_b = *(unsigned int *)b; @@ -340,7 +339,6 @@ static struct drgn_error *drgn_debug_info_parse_orc(struct drgn_module *module) for (unsigned int i = 0; i < num_entries; i++) indices[i] = i; - compare_orc_entries_module = module; /* * Sort the ORC entries for binary search. Since Linux kernel commit * f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables @@ -348,9 +346,9 @@ static struct drgn_error *drgn_debug_info_parse_orc(struct drgn_module *module) * it if necessary. */ for (unsigned int i = 1; i < num_entries; i++) { - if (compare_orc_entries(&indices[i - 1], &indices[i]) > 0) { - qsort(indices, num_entries, sizeof(indices[0]), - compare_orc_entries); + if (compare_orc_entries(&indices[i - 1], &indices[i], module) > 0) { + qsort_arg(indices, num_entries, sizeof(indices[0]), + compare_orc_entries, module); break; } } diff --git a/libdrgn/util.c b/libdrgn/util.c new file mode 100644 index 000000000..4650ee505 --- /dev/null +++ b/libdrgn/util.c @@ -0,0 +1,19 @@ +// Copyright (c) 2024 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later +#include "util.h" + +static _Thread_local int (*qsort_arg_compar)(const void *, const void *, void*); +static _Thread_local void *qsort_arg_arg; + +static int qsort_arg_compar_wrapper(const void *a, const void *b) +{ + return qsort_arg_compar(a, b, qsort_arg_arg); +} + +void qsort_arg(void *base, size_t nmemb, size_t size, + int (*compar)(const void *, const void *, void*), void *arg) +{ + qsort_arg_compar = compar; + qsort_arg_arg = arg; + qsort(base, nmemb, size, qsort_arg_compar_wrapper); +} diff --git a/libdrgn/util.h b/libdrgn/util.h index 78c0c4a32..ea5281210 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -211,4 +211,15 @@ static inline uint64_t uint_max(int n) #define add_to_possibly_null_pointer(ptr, i) \ ((typeof(ptr))((uintptr_t)(ptr) + (i) * sizeof(*(ptr)))) +/** + * Similar to qsort_r (passes @a arg to @a compar) but **not** reentrant + * + * The qsort_r() function's main feature is that it is reentrant, but also adds + * the convenience of including an argument to the callback function. + * Unfortunately it is a glibc extension. This provides a similar API but it is + * only thread-safe, not reentrant. See qsort_r(3) for details. + */ +void qsort_arg(void *base, size_t nmemb, size_t size, + int (*compar)(const void *, const void *, void*), void *arg); + #endif /* DRGN_UTIL_H */ From f64287ab27ea35c0466f20011699b446b69332b8 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 1 Mar 2024 16:46:53 -0800 Subject: [PATCH 2/6] python: Add helper for returning list of Symbols This will be reused in an upcoming commit. Signed-off-by: Stephen Brennan --- libdrgn/python/drgnpy.h | 2 ++ libdrgn/python/program.c | 18 +----------------- libdrgn/python/symbol.c | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 6c892512a..af2d7c4fe 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -304,6 +304,8 @@ Program *program_from_kernel(PyObject *self); Program *program_from_pid(PyObject *self, PyObject *args, PyObject *kwds); PyObject *Symbol_wrap(struct drgn_symbol *sym, PyObject *name_obj); +PyObject *Symbol_list_wrap(struct drgn_symbol **symbols, size_t count, + PyObject *name_obj); PyObject *Thread_wrap(struct drgn_thread *drgn_thread); diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 407d934ce..56804f7c4 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1231,23 +1231,7 @@ static PyObject *Program_symbols(Program *self, PyObject *args) if (err) return set_drgn_error(err); - _cleanup_pydecref_ PyObject *list = PyList_New(count); - if (!list) { - drgn_symbols_destroy(symbols, count); - return NULL; - } - for (size_t i = 0; i < count; i++) { - PyObject *pysym = Symbol_wrap(symbols[i], (PyObject *)self); - if (!pysym) { - /* Free symbols which aren't yet added to list. */ - drgn_symbols_destroy(symbols, count); - return NULL; - } - symbols[i] = NULL; - PyList_SET_ITEM(list, i, pysym); - } - free(symbols); - return_ptr(list); + return Symbol_list_wrap(symbols, count, (PyObject *)self); } static PyObject *Program_symbol(Program *self, PyObject *arg) diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index 83ea7525f..d0e84e1bf 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -16,6 +16,29 @@ PyObject *Symbol_wrap(struct drgn_symbol *sym, PyObject *name_obj) return (PyObject *)ret; } +PyObject *Symbol_list_wrap(struct drgn_symbol **symbols, size_t count, + PyObject *name_obj) +{ + _cleanup_pydecref_ PyObject *list = PyList_New(count); + if (!list) { + drgn_symbols_destroy(symbols, count); + return NULL; + } + for (size_t i = 0; i < count; i++) { + PyObject *pysym = Symbol_wrap(symbols[i], name_obj); + if (!pysym) { + /* Free symbols which aren't yet added to list. */ + drgn_symbols_destroy(symbols, count); + /* Free list and all symbols already added. */ + return NULL; + } + symbols[i] = NULL; + PyList_SET_ITEM(list, i, pysym); + } + free(symbols); + return_ptr(list); +} + static PyObject *Symbol_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { struct drgn_symbol *sym; From fda9b2566d422e698d8f9f6ab45b6d10672e7634 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Sun, 17 Mar 2024 09:55:05 -0700 Subject: [PATCH 3/6] libdrgn, python: add SymbolIndex The Symbol Finder API gives us the ability to register a dynamic callback for symbol lookup. However, many common use cases are satisfied by a simple static list of symbols. Correct and efficient lookup in this simple case is rather tricky. Implement a new type, SymbolIndex, which can take a list of symbols and index them for efficient lookup by name or address. Signed-off-by: Stephen Brennan --- _drgn.pyi | 63 ++++++++ docs/api_reference.rst | 1 + drgn/__init__.py | 2 + libdrgn/Makefile.am | 1 + libdrgn/python/drgnpy.h | 7 + libdrgn/python/main.c | 1 + libdrgn/python/program.c | 10 ++ libdrgn/python/symbol_index.c | 122 +++++++++++++++ libdrgn/symbol.c | 269 ++++++++++++++++++++++++++++++++++ libdrgn/symbol.h | 102 +++++++++++++ tests/test_symbol.py | 120 ++++++++++++++- 11 files changed, 697 insertions(+), 1 deletion(-) create mode 100644 libdrgn/python/symbol_index.c diff --git a/_drgn.pyi b/_drgn.pyi index a8452bb7e..4856c5b2b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1898,6 +1898,69 @@ class Symbol: kind: Final[SymbolKind] """Kind of entity represented by this symbol.""" +class SymbolIndex: + """ + A ``SymbolIndex`` contains a static set of symbols and allows efficient + lookup by name and address. + + With :meth:`Program.register_symbol_finder()`, you can add a callback to + provide custom symbol finding logic. However, in many cases, all that is + necessary is to provide drgn with a list of symbols that you know to be part + of the program. This object allows you to do that. It efficiently implements + the Symbol Finder API given a static set of symbols. For example:: + + >>> prog = drgn.Program() + >>> symbol = drgn.Symbol("foo", 0x123, 1, drgn.SymbolBinding.GLOBAL, drgn.SymbolKind.OBJECT) + >>> finder = drgn.SymbolIndex([symbol]) + >>> prog.register_symbol_finder("SymbolIndex", finder, enable_index=0) + >>> prog.symbols() + [Symbol(name='foo', address=0x123, size=0x1, binding=, kind=)] + >>> prog.symbol("bar") + Traceback (most recent call last): + File "", line 1, in + LookupError: not found + >>> prog.symbol("foo") + Symbol(name='foo', address=0x123, size=0x1, binding=, kind=) + >>> prog.symbol(0x100) + Traceback (most recent call last): + File "", line 1, in + LookupError: not found + >>> prog.symbol(0x123) + Symbol(name='foo', address=0x123, size=0x1, binding=, kind=) + """ + + def __init__(self, symbols: Iterable[Symbol]) -> None: + """ + Create a ``SymbolIndex`` from a sequence of symbols + + The returned symbol index satisfies the Symbol Finder API. It supports + overlapping symbol address ranges and duplicate symbol names. However, + in the case of these sorts of conflicts, it doesn't provide any + guarantee on the order of the results, or which result is returned when + a single symbol is requested. + + :param symbols: An iterable of symbols + :returns: A callable object suitable to provide to + :meth:`Program.register_symbol_finder()`. + """ + + def __call__( + self, + prog: Program, + name: Optional[str], + address: Optional[int], + one: bool, + ) -> List[Symbol]: + """ + Lookup symbol by name, address, or both. + + :param prog: (unused) the program looking up this symbol + :param name: if given, only return symbols with this name + :param address: if given, only return symbols spanning this address + :param one: if given, limit the result to a single symbol + :returns: a list of matching symbols (empty if none are found) + """ + class SymbolBinding(enum.Enum): """ A ``SymbolBinding`` describes the linkage behavior and visibility of a diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 2cf3789c7..b3c4d7b22 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -109,6 +109,7 @@ Symbols .. drgndoc:: Symbol .. drgndoc:: SymbolBinding .. drgndoc:: SymbolKind +.. drgndoc:: SymbolIndex Stack Traces ------------ diff --git a/drgn/__init__.py b/drgn/__init__.py index d83c40a8e..5a03f5a30 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -70,6 +70,7 @@ StackTrace, Symbol, SymbolBinding, + SymbolIndex, SymbolKind, Thread, Type, @@ -127,6 +128,7 @@ "StackTrace", "Symbol", "SymbolBinding", + "SymbolIndex", "SymbolKind", "Thread", "Type", diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 0ce6639e3..8cc46bbbe 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -171,6 +171,7 @@ _drgn_la_SOURCES = python/constants.c \ python/program.c \ python/stack_trace.c \ python/symbol.c \ + python/symbol_index.c \ python/test.c \ python/thread.c \ python/type.c \ diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index af2d7c4fe..8c2dcd06f 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -18,6 +18,7 @@ #include "../hash_table.h" #include "../pp.h" #include "../program.h" +#include "../symbol.h" /* These were added in Python 3.7. */ #ifndef Py_UNREACHABLE @@ -108,6 +109,11 @@ typedef struct { PyObject *attr_cache; } DrgnType; +typedef struct { + PyObject_HEAD + struct drgn_symbol_index index; +} SymbolIndex; + typedef struct { PyObject_HEAD /* @@ -242,6 +248,7 @@ extern PyTypeObject Register_type; extern PyTypeObject StackFrame_type; extern PyTypeObject StackTrace_type; extern PyTypeObject Symbol_type; +extern PyTypeObject SymbolIndex_type; extern PyTypeObject Thread_type; extern PyTypeObject ThreadIterator_type; extern PyTypeObject TypeEnumerator_type; diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index f5b164cd5..cd9e93874 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -297,6 +297,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) add_type(m, &StackFrame_type) || add_type(m, &StackTrace_type) || add_type(m, &Symbol_type) || + add_type(m, &SymbolIndex_type) || add_type(m, &DrgnType_type) || add_type(m, &Thread_type) || add_type(m, &ThreadIterator_type) || diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 56804f7c4..600991ac8 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -504,6 +504,16 @@ py_symbol_find_fn(const char *name, uint64_t addr, enum drgn_find_symbol_flags flags, void *arg, struct drgn_symbol_result_builder *builder) { + // Fast path for SymbolIndex: don't bother converting to and from Python + // types, as this is a C finder. Use Py_TYPE and pointer comparison + // directly here to avoid needing to take the GIL for + // PyObject_TypeCheck(). SymbolIndex cannot be subclassed, so the logic + // for subclass checking is unnecessary anyway. + if (Py_TYPE(PyTuple_GET_ITEM(arg, 1)) == &SymbolIndex_type) { + SymbolIndex *ix = (SymbolIndex *)PyTuple_GET_ITEM(arg, 1); + return drgn_symbol_index_find(name, addr, flags, &ix->index, builder); + } + PyGILState_guard(); _cleanup_pydecref_ PyObject *name_obj = NULL; diff --git a/libdrgn/python/symbol_index.c b/libdrgn/python/symbol_index.c new file mode 100644 index 000000000..d19467352 --- /dev/null +++ b/libdrgn/python/symbol_index.c @@ -0,0 +1,122 @@ +// Copyright (c) 2024 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../symbol.h" + +static void SymbolIndex_dealloc(SymbolIndex *self) +{ + drgn_symbol_index_deinit(&self->index); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *SymbolIndex_call(SymbolIndex *self, PyObject *args, PyObject *kwargs) +{ + PyObject *prog_obj; + struct index_arg address = { .allow_none = true }; + const char *name; + static char *kwnames[] = {"prog", "name", "address", "one", NULL}; + int single; // 'p' format specifier expects an int, not bool + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OzO&p:__call__", kwnames, + &prog_obj, &name, index_converter, &address, + &single)) + return NULL; + + unsigned int flags = 0; + if (single) + flags |= DRGN_FIND_SYMBOL_ONE; + if (!address.is_none) + flags |= DRGN_FIND_SYMBOL_ADDR; + if (name) + flags |= DRGN_FIND_SYMBOL_NAME; + + struct drgn_symbol_result_builder builder; + drgn_symbol_result_builder_init(&builder, flags & DRGN_FIND_SYMBOL_ONE); + + struct drgn_error *err = + drgn_symbol_index_find(name, address.uvalue, flags, &self->index, &builder); + if (err) + goto error; + + /* We return a list regardless */ + if (single) { + struct drgn_symbol *symbol = drgn_symbol_result_builder_single(&builder); + _cleanup_pydecref_ PyObject *list = PyList_New(symbol ? 1 : 0); + if (!list) + goto error; + if (symbol) { + PyObject *pysym = Symbol_wrap(symbol, (PyObject *)self); + if (!pysym) + goto error; + PyList_SET_ITEM(list, 0, pysym); + } + return_ptr(list); + } else { + struct drgn_symbol **syms; + size_t count; + drgn_symbol_result_builder_array(&builder, &syms, &count); + return Symbol_list_wrap(syms, count, (PyObject *)self); + } + + return NULL; +error: + drgn_symbol_result_builder_abort(&builder); + return err ? set_drgn_error(err) : NULL; +} + +static PyObject *SymbolIndex_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) +{ + static char *kwnames[] = {"symbols", NULL}; + PyObject *list_obj; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwnames, &list_obj)) + return NULL; + + _cleanup_pydecref_ PyObject *iter = + PyObject_GetIter(list_obj); + if (!iter) + return NULL; + + _cleanup_(drgn_symbol_index_builder_deinit) + struct drgn_symbol_index_builder builder; + drgn_symbol_index_builder_init(&builder); + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(iter); + if (!item) + break; + if (!PyObject_TypeCheck(item, &Symbol_type)) + return PyErr_Format(PyExc_TypeError, "expected sequence of Symbols"); + Symbol *sym = (Symbol *)item; + if (!drgn_symbol_index_builder_add(&builder, sym->sym)) + return PyErr_NoMemory(); + } + + if (PyErr_Occurred()) + return NULL; + + _cleanup_pydecref_ SymbolIndex *index_obj = call_tp_alloc(SymbolIndex); + if (!index_obj) + return NULL; + + struct drgn_error *err = + drgn_symbol_index_init_from_builder(&index_obj->index, + &builder); + // On error, the builder and index are already deinitialized + if (err) + return set_drgn_error(err); + + return (PyObject *)no_cleanup_ptr(index_obj); +} + +PyTypeObject SymbolIndex_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.SymbolIndex", + .tp_basicsize = sizeof(SymbolIndex), + .tp_dealloc = (destructor)SymbolIndex_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_SymbolIndex_DOC, + .tp_call = (ternaryfunc)SymbolIndex_call, + .tp_new = SymbolIndex_new, +}; diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 02ae0e7fd..786d8e0b5 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -2,13 +2,18 @@ // SPDX-License-Identifier: LGPL-2.1-or-later #include +#include #include #include +#include "binary_search.h" #include "drgn_internal.h" +#include "string_builder.h" #include "symbol.h" #include "util.h" +DEFINE_VECTOR_FUNCTIONS(symbol_vector); + LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { if (sym && sym->name_lifetime == DRGN_LIFETIME_OWNED) @@ -174,3 +179,267 @@ void drgn_symbol_result_builder_array(struct drgn_symbol_result_builder *builder symbolp_vector_shrink_to_fit(&builder->vector); symbolp_vector_steal(&builder->vector, syms_ret, count_ret); } + +static int name_compar(const void *lhs, const void *rhs, void *arg) +{ + struct drgn_symbol_index *ix = arg; + uint32_t left_ix = *(const uint32_t *)lhs; + uint32_t right_ix = *(const uint32_t *)rhs; + return strcmp(ix->symbols[left_ix].name, ix->symbols[right_ix].name); +} + +static int addr_compar(const void *lhs, const void *rhs) +{ + const struct drgn_symbol *left = lhs; + const struct drgn_symbol *right = rhs; + // returning a simple subtraction would not work well since these are + // unsigned + if (left->address < right->address) + return -1; + else if (left->address > right->address) + return 1; + else + return 0; +} + +struct drgn_error * +drgn_symbol_index_init(struct drgn_symbol *symbols, uint32_t count, + char *buffer, struct drgn_symbol_index *ret) +{ + ret->symbols = symbols; + ret->num_syms = count; + ret->strings = buffer; + ret->name_sort = NULL; + ret->max_addrs = NULL; + drgn_symbol_name_table_init(&ret->htab); + ret->name_sort = malloc_array(count, sizeof(ret->name_sort[0])); + if (!ret->name_sort) + goto enomem; + ret->max_addrs = malloc_array(count, sizeof(ret->max_addrs[0])); + if (!ret->max_addrs) + goto enomem; + + // In many cases (e.g kallsyms), symbols are already sorted by address, + // but not always. Check whether sorted, and if not, sort. + for (uint32_t i = 1; i < ret->num_syms; i++) { + if (ret->symbols[i - 1].address > ret->symbols[i].address) { + qsort(ret->symbols, count, sizeof(ret->symbols[0]), addr_compar); + break; + } + } + + // Kallsyms doesn't include symbol lengths, so symbols are + // non-overlapping. But this is not true in general! Symbols may + // overlap, which makes address lookup complicated. Rather than using a + // complex range data structure, we can use two binary searches, one to + // find the first symbol which could overlap with an address, and one to + // find the last symbol, and then linearly search that array. This + // performs poorly if there are symbols which span many others, but + // that's a rare case. In order to do this strategy, we need an array + // that contains the maximum address spanned by any symbol at or before + // that index. + if (ret->num_syms > 0) // in case num_syms == 0 + ret->max_addrs[0] = ret->symbols[0].address + ret->symbols[0].size; + for (uint32_t i = 1; i < ret->num_syms; i++) { + uint64_t max_addr = ret->symbols[i].address + ret->symbols[i].size; + ret->max_addrs[i] = max(ret->max_addrs[i - 1], max_addr); + } + + // Sort the "name_sort" array by name so we get runs of symbols with the + // same name + for (uint32_t i = 0; i < ret->num_syms; i++) + ret->name_sort[i] = i; + qsort_arg(ret->name_sort, ret->num_syms, sizeof(ret->name_sort[0]), + name_compar, ret); + + // For each unique symbol name, insert the range of symbol indexes + // into the hash table for fast name lookup + struct drgn_symbol_name_table_entry entry; + uint32_t current = 0; + while (current < ret->num_syms) { + const char *current_str = ret->symbols[ret->name_sort[current]].name; + uint32_t next = current + 1; + while (next < ret->num_syms) { + const char *next_str = ret->symbols[ret->name_sort[next]].name; + if (strcmp(current_str, next_str) != 0) + break; + next++; + } + + entry.key = current_str; + entry.value.start = current; + entry.value.end = next; + if (drgn_symbol_name_table_insert(&ret->htab, &entry, NULL) < 0) + goto enomem; + + current = next; + } + return NULL; + +enomem: + drgn_symbol_index_deinit(ret); + return &drgn_enomem; +} + +void +drgn_symbol_index_deinit(struct drgn_symbol_index *index) +{ + // The symbol array is contiguous and all names come from strings + free(index->symbols); + free(index->max_addrs); + drgn_symbol_name_table_deinit(&index->htab); + free(index->strings); + free(index->name_sort); + // Simplify error handling by ensuring deinit is safe to call twice + memset(index, 0, sizeof(*index)); +} + +static void address_search_range(struct drgn_symbol_index *index, uint64_t address, + uint32_t *start_ret, uint32_t *end_ret) +{ + // First, identify the maximum symbol index which could possibly contain + // this address. Think of this as: + // end_ret = bisect_right([s.address for s in symbols], address) + #define less_than_start(a, b) (*(a) < (b)->address) + *end_ret = binary_search_gt(index->symbols, index->num_syms, &address, + less_than_start); + #undef less_than_start + + // Second, identify first symbol index which could possibly contain this + // address. We need to use "max_addrs" for this task: + // bisect_right(max_addrs, address) + #define less_than_end(a, b) (*(a) < *(b)) + *start_ret = binary_search_gt(index->max_addrs, index->num_syms, &address, + less_than_end); + #undef less_than_end +} + +/** Allocate a copy of the symbol and add to it the builder */ +static bool add_symbol_result(struct drgn_symbol_result_builder *builder, + struct drgn_symbol *symbol) +{ + struct drgn_symbol *copy = malloc(sizeof(*copy)); + if (!copy) + return false; + *copy = *symbol; + if (!drgn_symbol_result_builder_add(builder, copy)) { + free(copy); + return false; + } + return true; +} + +struct drgn_error * +drgn_symbol_index_find(const char *name, uint64_t address, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder) +{ + struct drgn_symbol_index *index = arg; + + // Unlike the ELF symbol finder, we don't have any particular rules + // about which symbols get priority when looking up a single symbol. + // If we decide this logic is critical, it would probably make sense to + // move it into the symbol finder's API via the result builder, rather + // than reimplementing it here. + + if (flags & DRGN_FIND_SYMBOL_ADDR) { + uint32_t start, end; + address_search_range(index, address, &start, &end); + for (uint32_t i = start; i < end; i++) { + struct drgn_symbol *s = &index->symbols[i]; + if (s->address > address || address >= s->address + s->size) + continue; + if ((flags & DRGN_FIND_SYMBOL_NAME) && + strcmp(s->name, name) != 0) + continue; + if (!add_symbol_result(builder, s)) + return &drgn_enomem; + if (flags & DRGN_FIND_SYMBOL_ONE) + break; + } + } else if (flags & DRGN_FIND_SYMBOL_NAME) { + struct drgn_symbol_name_table_iterator it = + drgn_symbol_name_table_search(&index->htab, &name); + if (!it.entry) + return NULL; + for (uint32_t i = it.entry->value.start; i < it.entry->value.end; i++) { + struct drgn_symbol *s = &index->symbols[index->name_sort[i]]; + if (!add_symbol_result(builder, s)) + return &drgn_enomem; + if (flags & DRGN_FIND_SYMBOL_ONE) + break; + } + } else { + for (int i = 0; i < index->num_syms; i++) { + struct drgn_symbol *s = &index->symbols[i]; + if (!add_symbol_result(builder, s)) + return &drgn_enomem; + if (flags & DRGN_FIND_SYMBOL_ONE) + break; + } + } + return NULL; +} + +void +drgn_symbol_index_builder_init(struct drgn_symbol_index_builder *builder) +{ + builder->names = (struct string_builder)STRING_BUILDER_INIT; + symbol_vector_init(&builder->symbols); +} + +void +drgn_symbol_index_builder_deinit(struct drgn_symbol_index_builder *builder) +{ + string_builder_deinit(&builder->names); + symbol_vector_deinit(&builder->symbols); +} + +bool +drgn_symbol_index_builder_add(struct drgn_symbol_index_builder *builder, + const struct drgn_symbol *ptr) +{ + struct drgn_symbol copy = *ptr; + + // Temporarily store the index into the name + copy.name = (char *)builder->names.len; + return string_builder_append(&builder->names, ptr->name) + && string_builder_appendc(&builder->names, '\0') + && symbol_vector_append(&builder->symbols, ©); +} + +struct drgn_error * +drgn_symbol_index_init_from_builder(struct drgn_symbol_index *index, + struct drgn_symbol_index_builder *builder) +{ + size_t names_len = builder->names.len; + char *names = string_builder_steal(&builder->names); + char *tmp_names = realloc(names, names_len); + if (tmp_names) + names = tmp_names; + + symbol_vector_shrink_to_fit(&builder->symbols); + struct drgn_symbol *symbols; + size_t num_syms; + symbol_vector_steal(&builder->symbols, &symbols, &num_syms); + + // Now that the name array is finalized, resolve the names to real + // pointers. Update the name lifetime to static, reflecting that the + // symbol name is owned by the finder whose lifetime is bound to the + // program's once it is attached. + for (size_t i = 0; i < num_syms; i++) { + size_t string_index = (size_t)symbols[i].name; + symbols[i].name = &names[string_index]; + symbols[i].name_lifetime = DRGN_LIFETIME_STATIC; + } + + if (num_syms > UINT32_MAX) { + free(names); + free(symbols); + return drgn_error_format(DRGN_ERROR_OUT_OF_BOUNDS, + "too many symbols provided: %zu > %" PRIu32, + num_syms, UINT32_MAX); + } + + return drgn_symbol_index_init(symbols, num_syms, names, index); +} diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index 4a2caf1c5..410ebe2a3 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -1,4 +1,5 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. +// Copyright (c) 2024, Oracle and/or its affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later #ifndef DRGN_SYMBOL_H @@ -9,6 +10,8 @@ #include "cleanup.h" #include "drgn_internal.h" #include "handler.h" +#include "hash_table.h" +#include "string_builder.h" #include "vector.h" struct drgn_symbol { @@ -64,4 +67,103 @@ void drgn_symbol_result_builder_array(struct drgn_symbol_result_builder *builder struct drgn_error * drgn_symbol_copy(struct drgn_symbol *dst, struct drgn_symbol *src); +DEFINE_HASH_MAP(drgn_symbol_name_table, const char *, + struct { uint32_t start; uint32_t end; }, + c_string_key_hash_pair, c_string_key_eq); + +/** + * An index of symbols, supporting efficient lookup by name or address + * + * While the dynamic symbol finding callback is a very flexible API, many use + * cases can be served best by simply providing drgn with a known symbol table + * to index. Drgn can efficiently implement the name and address lookup + * functions once, and provide a symbol finder implementation, so that clients + * need not redo this boilerplate. + * + * In the interest of simplicity, the index is immutable once created. This + * allows us to use simple data structures. If the symbol table needs frequent + * updates, then registering a custom symbol finder should be preferred. + */ +struct drgn_symbol_index { + /** Array of symbols, in sorted order by address */ + struct drgn_symbol *symbols; + + /** Array of max_addr, to aid address lookup */ + uint64_t *max_addrs; + + /** Number of symbols */ + uint32_t num_syms; + + /** The buffer containing all symbol names */ + char *strings; + + /** Array of symbol indices, sorted by name. Used by the htab. */ + uint32_t *name_sort; + + /** Map of symbol names to index */ + struct drgn_symbol_name_table htab; +}; + +/** + * Create a symbol index from an array of symbols + * + * This takes ownership of the symbol array and the individual symbols. The @a + * buffer argument allows us to provide a single backing buffer for all strings + * (in which case the lifetimes of each symbol name should be static). On error + * @a symbols and @a buffer are already freed, since the builder took ownership + * of them. + */ +struct drgn_error * +drgn_symbol_index_init(struct drgn_symbol *symbols, uint32_t count, + char *buffer, struct drgn_symbol_index *ret); + +/** Deinitialize the symbol index. Safe to call multiple times. */ +void drgn_symbol_index_deinit(struct drgn_symbol_index *index); + +DEFINE_VECTOR_TYPE(symbol_vector, struct drgn_symbol); + +struct drgn_symbol_index_builder { + struct string_builder names; + struct symbol_vector symbols; +}; + +/** + * Create a symbol builder which will efficiently pack string names next + * to each other in memory, rather than allocating many small strings. + */ +void +drgn_symbol_index_builder_init(struct drgn_symbol_index_builder *builder); + +/** + * For destroying a builder on error conditions. It is safe to call this + * multiple times, including after drgn_symbol_index_init_from_builder(). + */ +void +drgn_symbol_index_builder_deinit(struct drgn_symbol_index_builder *builder); + +/** + * Add symbol to the builder: the builder does not take ownership of @a ptr, + * instead making a copy. + */ +bool +drgn_symbol_index_builder_add(struct drgn_symbol_index_builder *builder, + const struct drgn_symbol *ptr); + +/** + * Convert the builder to a symbol index, destroying the builder. + * On error, the builder and symbol index are both deinitialized, requiring no + * further cleanup. + */ +struct drgn_error * +drgn_symbol_index_init_from_builder(struct drgn_symbol_index *index, + struct drgn_symbol_index_builder *builder); + +/** + * The actual implementation of the Symbol Finder API. + */ +struct drgn_error * +drgn_symbol_index_find(const char *name, uint64_t address, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder); + #endif /* DRGN_SYMBOL_H */ diff --git a/tests/test_symbol.py b/tests/test_symbol.py index ee84c7e29..d9cc3dd94 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -3,7 +3,7 @@ import tempfile from _drgn_util.elf import ET, PT, SHT, STB, STT -from drgn import Program, Symbol, SymbolBinding, SymbolKind +from drgn import Program, Symbol, SymbolBinding, SymbolIndex, SymbolKind from tests import TestCase from tests.dwarfwriter import dwarf_sections from tests.elfwriter import ElfSection, ElfSymbol, create_elf_file @@ -343,3 +343,121 @@ def test_many_without_filter(self): self.expect_args(None, None, False) self.assertEqual(self.prog.symbols(), self.TEST_SYMS) self.assertTrue(self.called) + + +class TestSymbolIndex(TestCase): + # Symbols are listed here in order of address, but are shuffled below + AA = Symbol("AA", 10, 5, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + BB = Symbol("BB", 12, 1, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + CC = Symbol("CC", 13, 8, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + DD = Symbol("DD", 28, 5, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + EE = Symbol("EE", 34, 1, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + FF = Symbol("FF", 34, 10, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + GG = Symbol("GG", 34, 2, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + BB2 = Symbol("BB", 36, 3, SymbolBinding.GLOBAL, SymbolKind.OBJECT) + + TEST_SYMS = [GG, BB, AA, BB2, CC, FF, DD, EE] + + def setUp(self): + # This class tests both the SymbolIndex callable interface, and the + # Symbol Finder API. While this seems like it duplicates code, it's + # necessary to test both since they exercise different code paths: the + # Symbol Finder API uses a more efficient fast path. + self.finder = SymbolIndex(self.TEST_SYMS) + self.prog = Program() + self.prog.register_symbol_finder("test", self.finder, enable_index=0) + + def test_name_single(self): + for sym in self.TEST_SYMS: + if sym.name != "BB": + self.assertEqual([sym], self.finder(self.prog, sym.name, None, True)) + self.assertEqual(sym, self.prog.symbol(sym.name)) + self.assertEqual([sym], self.finder(self.prog, sym.name, None, False)) + self.assertEqual([sym], self.prog.symbols(sym.name)) + + def test_name_multiple(self): + multi_result = self.finder(self.prog, "BB", None, False) + self.assertEqual(2, len(multi_result)) + self.assertIn(self.BB, multi_result) + self.assertIn(self.BB2, multi_result) + + multi_result = self.prog.symbols("BB") + self.assertEqual(2, len(multi_result)) + self.assertIn(self.BB, multi_result) + self.assertIn(self.BB2, multi_result) + + single_result = self.finder(self.prog, "BB", None, True) + self.assertIn(single_result[0], (self.BB, self.BB2)) + + single_result = self.prog.symbol("BB") + self.assertIn(single_result, (self.BB, self.BB2)) + + def test_addr(self): + cases = { + 9: [], + 10: [self.AA], + 12: [self.AA, self.BB], + 13: [self.AA, self.CC], + 15: [self.CC], + 25: [], + 28: [self.DD], + 30: [self.DD], + 34: [self.EE, self.FF, self.GG], + 35: [self.FF, self.GG], + 36: [self.FF, self.BB2], + 43: [self.FF], + 44: [], + } + for address, expected in cases.items(): + # first, lookup by address alone and ensure we get all correct + # candidates: + multi_result = self.finder(self.prog, None, address, False) + self.assertEqual(len(expected), len(multi_result)) + self.assertTrue(all(e in multi_result for e in expected)) + multi_result = self.prog.symbols(address) + self.assertEqual(len(expected), len(multi_result)) + self.assertTrue(all(e in multi_result for e in expected)) + + # next, ensure that the single lookup works as expected: + if expected: + single_result = self.finder(self.prog, None, address, True) + self.assertEqual(1, len(single_result)) + self.assertIn(single_result[0], expected) + single_result = self.prog.symbol(address) + self.assertIn(single_result, expected) + + # Now, test that adding a name filter correctly filters: + # This cannot be tested with the Program.symbol() API since only + # one filter is allowed there. + for sym in expected: + self.assertEqual([sym], self.finder(self.prog, sym.name, address, True)) + self.assertEqual( + [sym], self.finder(self.prog, sym.name, address, False) + ) + + self.assertEqual([], self.finder(None, "MISSING", address, True)) + self.assertEqual([], self.finder(None, "MISSING", address, False)) + + def test_all(self): + result = self.finder(self.prog, None, None, True) + self.assertEqual(1, len(result)) + self.assertIn(result[0], self.TEST_SYMS) + result = self.finder(self.prog, None, None, False) + self.assertEqual(len(self.TEST_SYMS), len(result)) + for sym in self.TEST_SYMS: + self.assertIn(sym, result) + result = self.prog.symbols() + self.assertEqual(len(self.TEST_SYMS), len(result)) + for sym in self.TEST_SYMS: + self.assertIn(sym, result) + + def test_empty_index(self): + index = SymbolIndex([]) + # Check all the possible query patterns to ensure they can safely handle + # an empty list. + self.assertEqual([], index(self.prog, "name search", None, True)) + self.assertEqual([], index(self.prog, "name search", None, False)) + self.assertEqual([], index(self.prog, None, 0xFFFF, True)) + self.assertEqual([], index(self.prog, None, 0xFFFF, False)) + self.assertEqual([], index(self.prog, "name search", 0xFFFF, True)) + self.assertEqual([], index(self.prog, "name search", 0xFFFF, False)) From 151c9bc8e7b8e4cfbb733e200426ac60c2717cc1 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Sun, 17 Mar 2024 10:01:51 -0700 Subject: [PATCH 4/6] helpers: linux: add vmlinux kallsyms helpers When properly configured, Linux contains its own symbol table within kernel memory at runtime. It is exposed as the /proc/kallsyms file, which is the easiest way to consume it, for live kernels. However, with recent changes to the Linux kernel in 6.0, necessary symbols are exposed within VMCOREINFO that allow us to interpret the data structures inside kernel memory, without needing debuginfo. This allows us to write helpers that can load kallsyms on vmcores, or on live systems. Signed-off-by: Stephen Brennan --- _drgn.pyi | 5 + drgn/helpers/linux/kallsyms.py | 75 +++ libdrgn/Makefile.am | 2 + libdrgn/kallsyms.c | 660 ++++++++++++++++++++ libdrgn/kallsyms.h | 46 ++ libdrgn/python/drgnpy.h | 6 + libdrgn/python/helpers.c | 54 ++ libdrgn/python/main.c | 6 + libdrgn/python/util.c | 11 + tests/linux_kernel/helpers/test_kallsyms.py | 95 +++ 10 files changed, 960 insertions(+) create mode 100644 drgn/helpers/linux/kallsyms.py create mode 100644 libdrgn/kallsyms.c create mode 100644 libdrgn/kallsyms.h create mode 100644 tests/linux_kernel/helpers/test_kallsyms.py diff --git a/_drgn.pyi b/_drgn.pyi index 4856c5b2b..9fcd8e75d 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2839,3 +2839,8 @@ def _linux_helper_pid_task(pid: Object, pid_type: IntegerLike) -> Object: def _linux_helper_find_task(__ns: Object, __pid: IntegerLike) -> Object: ... def _linux_helper_kaslr_offset(__prog: Program) -> int: ... def _linux_helper_pgtable_l5_enabled(__prog: Program) -> bool: ... +def _linux_helper_load_proc_kallsyms( + filename: Optional[str] = None, + modules: bool = False, +) -> SymbolIndex: ... +def _linux_helper_load_builtin_kallsyms(prog: Program) -> SymbolIndex: ... diff --git a/drgn/helpers/linux/kallsyms.py b/drgn/helpers/linux/kallsyms.py new file mode 100644 index 000000000..bd2c0887e --- /dev/null +++ b/drgn/helpers/linux/kallsyms.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024 Oracle and/or its affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later +""" +Kallsyms +-------- + +The ``drgn.helpers.linux.kallsyms`` module contains helpers which allow you to +use the built-in kallsyms symbol table for drgn symbol lookup. Combined with an +alternative type information source, this can enable debugging Linux kernel core +dumps without the corresponding DWARF debuginfo files. Even without type +information, kallsyms can be used to help locate objects, and drgn's low-level +memory reading functions can be used to do basic debugging tasks. +""" +import os +import re +from typing import Dict + +from _drgn import ( + _linux_helper_load_builtin_kallsyms, + _linux_helper_load_proc_kallsyms as _load_proc_kallsyms, +) +from drgn import Program, ProgramFlags, SymbolIndex + +__all__ = ( + "load_vmlinux_kallsyms", +) + + +def _vmcoreinfo_symbols(prog: Program) -> Dict[str, int]: + vmcoreinfo_data = prog["VMCOREINFO"].string_().decode("ascii") + vmcoreinfo_symbols = {} + sym_re = re.compile(r"SYMBOL\(([^)]+)\)=([A-Fa-f0-9]+)") + for line in vmcoreinfo_data.strip().split("\n"): + match = sym_re.fullmatch(line) + if match: + vmcoreinfo_symbols[match.group(1)] = int(match.group(2), 16) + return vmcoreinfo_symbols + + +def _load_builtin_kallsyms(prog: Program) -> SymbolIndex: + symbol_reqd = [ + "kallsyms_names", + "kallsyms_token_table", + "kallsyms_token_index", + "kallsyms_num_syms", + "kallsyms_offsets", + "kallsyms_relative_base", + "kallsyms_addresses", + "_stext", + ] + symbols = _vmcoreinfo_symbols(prog) + args = [] + for sym in symbol_reqd: + args.append(symbols.get(sym, 0)) + return _linux_helper_load_builtin_kallsyms(prog, *args) + + +def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex: + """ + Create a kallsyms index for vmlinux + + This function loads the kallsyms for the core kernel and returns a symbol + index. This function does not require that any debuginfo is loaded for the + kernel: it either relies on ``/proc/kallsyms`` (which requires running drgn + as root) or it parses internal data structures using information found from + the VMCOREINFO note (which requires Linux 6.0 or later, or a backport of + commit ``f09bddbd86619 ("vmcoreinfo: add kallsyms_num_syms symbol")`` and + its dependencies). + + :returns: a symbol index containing kallsyms for the core kernel (vmlinux) + """ + if prog.flags & ProgramFlags.IS_LIVE and os.geteuid() == 0: + return _load_proc_kallsyms() + else: + return _load_builtin_kallsyms(prog) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 8cc46bbbe..261d097bf 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -73,6 +73,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ helpers.h \ io.c \ io.h \ + kallsyms.c \ + kallsyms.h \ language.c \ language.h \ language_c.c \ diff --git a/libdrgn/kallsyms.c b/libdrgn/kallsyms.c new file mode 100644 index 000000000..901965409 --- /dev/null +++ b/libdrgn/kallsyms.c @@ -0,0 +1,660 @@ +// Copyright (c) 2024 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "binary_buffer.h" +#include "drgn_internal.h" +#include "kallsyms.h" +#include "program.h" +#include "string_builder.h" +#include "symbol.h" + +/** + * This struct contains the tables necessary to reconstruct kallsyms names. + * + * vmlinux (core kernel) kallsyms names are compressed using table compression. + * There is some description of it in the kernel's "scripts/kallsyms.c", but + * this is a brief overview that should make the code below comprehensible. + * + * Table compression uses the remaining 128 characters not defined by ASCII and + * maps them to common substrings (e.g. the prefix "write_"). Each name is + * represented as a sequence of bytes which refers to strings in this table. + * The two arrays below comprise this table: + * + * - token_table: this is one long string with all of the tokens concatenated + * together, e.g. "a\0b\0c\0...z\0write_\0read_\0..." + * - token_index: this is a 256-entry long array containing the index into + * token_table where you'll find that token's string. + * + * To decode a string, for each byte you simply index into token_index, then use + * that to index into token_table, and copy that string into your buffer. + * + * The actual kallsyms symbol names are concatenated into a buffer called + * "names". The first byte in a name is the length (in tokens, not decoded + * bytes) of the symbol name. The remaining "length" bytes are decoded via the + * table as described above. The first decoded byte is a character representing + * what type of symbol this is (e.g. text, data structure, etc). + */ +struct kallsyms_reader { + uint32_t num_syms; + uint8_t *names; + size_t names_len; + char *token_table; + size_t token_table_len; + uint16_t *token_index; + bool long_names; +}; + +/* + * Kallsyms doesn't include symbol length. We determine symbol length by the + * start of the subsequent symbol. Unfortunately, there can be large gaps in + * the symbol table, for instance on x86_64 the Linux kernel has percpu symbols + * near the beginning of the address space, and a large gap before normal kernel + * symbols. The result of this is that we can create symbols with incredibly + * large sizes, and then drgn's symbolization will print addresses using that + * symbol and a very large offset, which is absolutely meaningless. + * + * To avoid this, we set a cap on the length of a symbol. Unfortunately, this is + * a heuristic. It's entirely possible to have very large data symbols. This + * value is chosen somewhat arbitrarily, but seems to produce decent results. + */ +#define MAX_SYMBOL_LENGTH 0x10000 + +/* + * Since 73bbb94466fd3 ("kallsyms: support "big" kernel symbols"), the + * "kallsyms_names" array may use the most significant bit to indicate that the + * initial element for each symbol (normally representing the number of tokens + * in the symbol) requires two bytes. + * + * Unfortunately, that means that values 128-255 are now ambiguous: on older + * kernels, they should be interpreted literally, but on newer kernels, they + * require treating as a two byte sequence. Since the commit included no changes + * to the symbol names or vmcoreinfo, there's no way to detect it except via + * heuristics. + * + * The commit in question is a new feature and not likely to be backported to + * stable, so our heuristic is that it was first included in kernel 6.1. + * However, we first check the environment variable DRGN_KALLSYMS_LONG: if it + * exists, then we use its first character to determine our behavior: 1, y, Y + * all indicate that we should use long names. 0, n, N all indicate that we + * should not. + */ +static bool guess_long_names(struct drgn_program *prog) +{ + const char *env = getenv("DRGN_KALLSYMS_LONG"); + if (env) { + if (*env == '1' || *env == 'y' || *env == 'Y') + return true; + else if (*env == '0' || *env == 'n' || *env == 'N') + return false; + } + + char *p = prog->vmcoreinfo.osrelease; + long major = strtol(p, &p, 10); + long minor = 0; + if (*p == '.') + minor = strtol(p + 1, NULL, 10); + + return (major == 6 && minor >= 1) || major > 6; +} + +/** + * Copy the kallsyms names tables from the program into host memory. + * @param prog Program to read from + * @param kr kallsyms_reader to populate + * @param vi vmcoreinfo for the program + */ +static struct drgn_error * +kallsyms_copy_tables(struct drgn_program *prog, struct kallsyms_reader *kr, + struct kallsyms_locations *loc) +{ + struct drgn_error *err; + const size_t token_index_size = (UINT8_MAX + 1) * sizeof(uint16_t); + uint64_t last_token; + size_t names_idx; + char data; + uint8_t len_u8; + int len; + bool bswap; + + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + + // Read num_syms from vmcore (bswap is done for us already) + err = drgn_program_read_u32(prog, + loc->kallsyms_num_syms, + false, &kr->num_syms); + if (err) + return err; + + // Read the constant-sized token_index table (256 entries) + kr->token_index = malloc(token_index_size); + if (!kr->token_index) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->token_index, + loc->kallsyms_token_index, + token_index_size, false); + if (err) + return err; + if (bswap) + for (size_t i = 0; i < kr->num_syms; i++) + kr->token_index[i] = bswap_16(kr->token_index[i]); + + // Find the end of the last token, so we get the overall length of + // token_table. Then copy the token_table into host memory. + last_token = loc->kallsyms_token_table + kr->token_index[UINT8_MAX]; + do { + err = drgn_program_read_memory(prog, &data, + last_token, 1, false); + if (err) + return err; + + last_token++; + } while (data); + kr->token_table_len = last_token - loc->kallsyms_token_table + 1; + kr->token_table = malloc(kr->token_table_len); + if (!kr->token_table) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->token_table, + loc->kallsyms_token_table, + kr->token_table_len, false); + if (err) + return err; + + // Ensure that all members of token_index are in-bounds for indexing + // into token_table. + for (size_t i = 0; i <= UINT8_MAX; i++) + if (kr->token_index[i] >= kr->token_table_len) + return drgn_error_format(DRGN_ERROR_OTHER, + "kallsyms: token_index out of bounds (token_index[%zu] = %u >= %zu)", + i, kr->token_index[i], kr->token_table_len); + + // Now find the end of the names array by skipping through it, then copy + // that into host memory. + names_idx = 0; + kr->long_names = guess_long_names(prog); + for (size_t i = 0; i < kr->num_syms; i++) { + err = drgn_program_read_u8(prog, + loc->kallsyms_names + names_idx, + false, &len_u8); + if (err) + return err; + len = len_u8; + if ((len & 0x80) && kr->long_names) { + if (__builtin_add_overflow(names_idx, 1, &names_idx)) + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't find end of kallsyms_names"); + err = drgn_program_read_u8(prog, + loc->kallsyms_names + names_idx, + false, &len_u8); + if (err) + return err; + // 73bbb94466fd3 ("kallsyms: support "big" kernel + // symbols") mentions that ULEB128 is used, but only + // implements the ability to encode lengths with 2 + // bytes, for a maximum value of 16k. It's possible in + // the future we may need to support larger sizes, but + // it's difficult to predict the future of the kallsyms + // format. For now, just check that there's no third + // byte to the length. + if (len_u8 & 0x80) + return drgn_error_format( + DRGN_ERROR_OTHER, + "Unexpected 3-byte length encoding in kallsyms names" + ); + len = (len & 0x7F) | (len_u8 << 7); + } + if (__builtin_add_overflow(names_idx, len + 1, &names_idx)) + return drgn_error_format( + DRGN_ERROR_OTHER, "couldn't find end of kallsyms_names"); + } + kr->names_len = names_idx; + kr->names = malloc(names_idx); + if (!kr->names) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->names, + loc->kallsyms_names, + names_idx, false); + if (err) + return err; + + return NULL; +} + +static struct drgn_error *kallsyms_binary_buffer_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_format(DRGN_ERROR_OTHER, + "couldn't parse kallsyms: %s", message); +} + +/** + * Extract the symbol name and type + * @param kr Registry containing kallsyms data + * @param names_bb A binary buffer tracking our position within the + * `kallsyms_names` array + * @param sb Buffer to write output symbol to + * @param[out] kind_ret Where to write the symbol kind data + * @returns NULL on success, or an error + */ +static struct drgn_error * +kallsyms_expand_symbol(struct kallsyms_reader *kr, + struct binary_buffer *names_bb, + struct string_builder *sb, char *kind_ret) +{ + uint64_t len; + struct drgn_error *err = binary_buffer_next_uleb128(names_bb, &len); + if (err) + return err; + + const uint8_t *data = (uint8_t *)names_bb->pos; + err = binary_buffer_skip(names_bb, len); + if (err) + return err; + + bool skipped_first = false; + + while (len) { + char *token_ptr = &kr->token_table[kr->token_index[*data]]; + while (*token_ptr) { + if (skipped_first) { + if (!string_builder_appendc(sb, *token_ptr)) + return &drgn_enomem; + } else { + *kind_ret = *token_ptr; + skipped_first = true; + } + token_ptr++; + } + + data++; + len--; + } + + if (!string_builder_null_terminate(sb)) + return &drgn_enomem; + return NULL; +} + +/** + * Used to find _stext in the kallsyms before we've moved everything into + * the drgn_symbol_index. Finds the index matching the given name, or -1. + */ +static struct drgn_error * +search_for_string(struct kallsyms_reader *kr, const char *name, ssize_t *ret) +{ + STRING_BUILDER(sb); + size_t len = strlen(name); + struct binary_buffer names_bb; + binary_buffer_init(&names_bb, kr->names, kr->names_len, false, + kallsyms_binary_buffer_error); + for (ssize_t i = 0; i < kr->num_syms; i++) { + char kind; + sb.len = 0; + struct drgn_error *err = + kallsyms_expand_symbol(kr, &names_bb, &sb, &kind); + if (err) + return err; + if (sb.len == len && strcmp(name, sb.str) == 0) { + *ret = i; + return NULL; + } + } + return drgn_error_format(DRGN_ERROR_OTHER, + "Could not find '%s' symbol in kallsyms", name); +} + +static void symbol_from_kallsyms(uint64_t address, char *name, char kind, + uint64_t size, struct drgn_symbol *ret) +{ + char kind_lower = tolower(kind); + ret->name = name; + ret->address = address; + ret->size = size; + ret->binding = DRGN_SYMBOL_BINDING_GLOBAL; + + // See nm(1) for information on decoding this "kind" character + if (kind == 'u') + ret->binding = DRGN_SYMBOL_BINDING_UNIQUE; + else if (kind_lower == 'v' || kind_lower == 'w') + ret->binding = DRGN_SYMBOL_BINDING_WEAK; + else if (isupper(kind)) + ret->binding = DRGN_SYMBOL_BINDING_GLOBAL; + else + // If lowercase, the symbol is usually local, but it's + // not guaranteed. Use unknown for safety here. + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + + switch (kind_lower) { + case 'b': // bss + case 'c': // uninitialized data + case 'd': // initialized data + case 'g': // initialized data (small objects) + case 'r': // read-only data + case 'v': // weak object (guaranteed by elf_info() in kernel/module.c) + ret->kind = DRGN_SYMBOL_KIND_OBJECT; + break; + case 't': // text + ret->kind = DRGN_SYMBOL_KIND_FUNC; + break; + default: + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; + } + ret->name_lifetime = DRGN_LIFETIME_STATIC; +} + +/** Compute an address via the CONFIG_KALLSYMS_ABSOLUTE_PERCPU method*/ +static uint64_t absolute_percpu(uint64_t base, int32_t val) +{ + if (val >= 0) + return (uint64_t) val; + else + return base - 1 - val; +} + +/** + * Load the kallsyms address information from @a prog + * + * Just as symbol name loading is complex, so is address loading. Addresses may + * be stored directly as an array of pointers, but more commonly, they are + * stored as an array of 32-bit integers which are related to an offset. This + * function decodes the addresses into a plain array of 64-bit addresses. + * + * @param prog The program to read from + * @param kr The symbol registry to fill + * @param vi vmcoreinfo containing necessary symbols + * @returns NULL on success, or error + */ +static struct drgn_error * +kallsyms_load_addresses(struct drgn_program *prog, struct kallsyms_reader *kr, + struct kallsyms_locations *loc, uint64_t **ret) +{ + struct drgn_error *err = NULL; + bool bswap, bits64; + _cleanup_free_ uint32_t *addr32 = NULL; + + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + err = drgn_program_is_64_bit(prog, &bits64); + if (err) + return err; + + _cleanup_free_ uint64_t *addresses = + malloc_array(kr->num_syms, sizeof(addresses[0])); + if (!addresses) + return &drgn_enomem; + + if (loc->kallsyms_addresses) { + /* + * The kallsyms addresses are stored as plain addresses in an + * array of unsigned long! Read the appropriate size array and + * do any necessary byte swaps. + */ + if (bits64) { + err = drgn_program_read_memory(prog, addresses, + loc->kallsyms_addresses, + kr->num_syms * sizeof(addresses[0]), + false); + if (err) + return err; + if (bswap) + for (int i = 0; i < kr->num_syms; i++) + addresses[i] = bswap_64(addresses[i]); + } else { + addr32 = malloc_array(kr->num_syms, sizeof(addr32[0])); + if (!addr32) + return &drgn_enomem; + + err = drgn_program_read_memory(prog, addr32, + loc->kallsyms_addresses, + kr->num_syms * sizeof(addr32[0]), + false); + if (err) + return err; + for (int i = 0; i < kr->num_syms; i++) { + if (bswap) + addresses[i] = bswap_32(addr32[i]); + else + addresses[i] = addr32[i]; + } + } + } else { + /* + * The kallsyms addresses are stored in an array of 4-byte + * values, which can be interpreted in two ways: + * (1) if CONFIG_KALLSYMS_ABSOLUTE_PERCPU is enabled, then + * positive values are addresses, and negative values are + * offsets from a base address. + * (2) otherwise, the 4-byte values are directly used as + * addresses + * First, read the values, then figure out which way to + * interpret them. + */ + uint64_t relative_base; + if (bits64) { + // performs the bswap for us, if necessary + err = drgn_program_read_u64(prog, loc->kallsyms_relative_base, + false, &relative_base); + if (err) + return err; + } else { + uint32_t rel32; + // performs the bswap for us, if necessary + err = drgn_program_read_u32(prog, loc->kallsyms_relative_base, + false, &rel32); + if (err) + return err; + relative_base = rel32; + } + addr32 = malloc_array(kr->num_syms, sizeof(addr32[0])); + if (!addr32) + return &drgn_enomem; + + err = drgn_program_read_memory(prog, addr32, + loc->kallsyms_offsets, + kr->num_syms * sizeof(uint32_t), + false); + if (err) + return err; + if (bswap) + for (int i = 0; i < kr->num_syms; i++) + addr32[i] = bswap_32(addr32[i]); + + /* + * Now that we've read the offsets data, we need to determine + * how to interpret them. To do this, use the _stext symbol. We + * have the correct value from vmcoreinfo. Compute it both ways + * and pick the correct interpretation. + */ + ssize_t stext_idx; + err = search_for_string(kr, "_stext", &stext_idx); + if (err) + return err; + uint64_t stext_abs = relative_base + addr32[stext_idx]; + uint64_t stext_pcpu = absolute_percpu(relative_base, (int32_t)addr32[stext_idx]); + if (stext_abs == loc->_stext) { + for (int i = 0; i < kr->num_syms; i++) + addresses[i] = relative_base + addr32[i]; + } else if (stext_pcpu == loc->_stext) { + for (int i = 0; i < kr->num_syms; i++) + addresses[i] = absolute_percpu(relative_base, (int32_t)addr32[i]); + } else { + err = drgn_error_create( + DRGN_ERROR_OTHER, + "Unable to interpret kallsyms address data"); + if (err) + return err; + } + } + *ret = no_cleanup_ptr(addresses); + return NULL; +} + +static void kallsyms_reader_cleanup(struct kallsyms_reader *kr) +{ + free(kr->names); + free(kr->token_index); + free(kr->token_table); +} + +struct drgn_error * +drgn_load_builtin_kallsyms(struct drgn_program *prog, + struct kallsyms_locations *loc, + struct drgn_symbol_index *ret) +{ + if (!(loc->kallsyms_names && loc->kallsyms_token_table + && loc->kallsyms_token_index && loc->kallsyms_num_syms)) + return drgn_error_create( + DRGN_ERROR_MISSING_DEBUG_INFO, + "The symbols: kallsyms_names, kallsyms_token_table, " + "kallsyms_token_index, and kallsyms_num_syms were not " + "found in VMCOREINFO. There is not enough " + "information to load the kallsyms table." + ); + + _cleanup_(kallsyms_reader_cleanup) struct kallsyms_reader kr = {}; + + struct drgn_error *err = kallsyms_copy_tables(prog, &kr, loc); + if (err) + return err; + + _cleanup_free_ uint64_t *addresses = NULL; + err = kallsyms_load_addresses(prog, &kr, loc, &addresses); + if (err) + return err; + + _cleanup_(drgn_symbol_index_builder_deinit) + struct drgn_symbol_index_builder builder; + drgn_symbol_index_builder_init(&builder); + STRING_BUILDER(sb); + + struct binary_buffer names_bb; + binary_buffer_init(&names_bb, kr.names, kr.names_len, false, + kallsyms_binary_buffer_error); + for (int i = 0; i < kr.num_syms; i++) { + struct drgn_symbol symbol; + char kind; + uint64_t size = 0; + sb.len = 0; + err = kallsyms_expand_symbol(&kr, &names_bb, &sb, &kind); + if (err) + return err; + if (sb.len == 0) + return drgn_error_format(DRGN_ERROR_OTHER, + "error: zero-length symbol in kallsyms"); + if (i + 1 < kr.num_syms && + addresses[i + 1] - addresses[i] < MAX_SYMBOL_LENGTH) + size = addresses[i + 1] - addresses[i]; + symbol_from_kallsyms(addresses[i], sb.str, kind, size, &symbol); + if (!drgn_symbol_index_builder_add(&builder, &symbol)) + return &drgn_enomem; + } + + return drgn_symbol_index_init_from_builder(ret, &builder); +} + +/** Load kallsyms directly from the /proc/kallsyms file */ +struct drgn_error *drgn_load_proc_kallsyms(const char *filename, bool modules, + struct drgn_symbol_index *ret) +{ + _cleanup_fclose_ FILE *fp = fopen(filename, "r"); + if (!fp) + return drgn_error_create_os("fopen", errno, filename); + + struct drgn_error *err = NULL; + struct drgn_symbol sym = {}; + _cleanup_(drgn_symbol_index_builder_deinit) + struct drgn_symbol_index_builder builder; + drgn_symbol_index_builder_init(&builder); + _cleanup_free_ char *line = NULL; + _cleanup_free_ char *current_module = NULL; + size_t line_size = 0, line_number = 1; + ssize_t res; + while ((res = getline(&line, &line_size, fp)) != -1) { + char *save = NULL; + char *name, *type_str, *mod, *addr_rem, *addr_str; + char type; + uint64_t addr; + bool new_module = false; + + addr_str = strtok_r(line, " \t\r\n", &save); + type_str = strtok_r(NULL," \t\r\n", &save); + name = strtok_r(NULL," \t\r\n", &save); + mod = strtok_r(NULL," \t\r\n", &save); + + if (!addr_str || !type_str || !name) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "Error parsing kallsyms line %zu", + line_number); + break; + } + if (mod && !modules) { + break; + } else if (mod && (!current_module || strcmp(mod, current_module) != 0)) { + free(current_module); + current_module = strdup(mod); + new_module = true; + if (!current_module) { + err = &drgn_enomem; + break; + } + } + + type = *type_str; + addr = strtoull(addr_str, &addr_rem, 16); + if (*addr_rem) { + // addr_rem should be set to the first un-parsed character, and + // since the entire string should be a valid base 16 integer, + // we expect it to be \0 + err = drgn_error_format(DRGN_ERROR_OTHER, + "Invalid address \"%s\" in kallsyms line %zu", + addr_str, line_number); + break; + } + + // We now may know the size of the previous symbol, so long as + // that symbol was in the same module as the current one. + // Otherwise we'll leave it as zero. Note that for module + // kallsyms, it has been observed that addresses are not always + // increasing, even within the same module, so we need to be + // careful to avoid overflow here. + if (!new_module && addr > sym.address) { + uint64_t size = addr - sym.address; + if (size < MAX_SYMBOL_LENGTH) + sym.size = size; + } + if (sym.name && !drgn_symbol_index_builder_add(&builder, &sym)) { + err = &drgn_enomem; + break; + } + free((char *)sym.name); + + symbol_from_kallsyms(addr, name, type, 0, &sym); + + // Copy the name so we don't clobber it in the next iteration + sym.name = strdup(name); + if (!sym.name) { + err = &drgn_enomem; + break; + } + + line_number++; + } + + if (!err && ferror(fp)) + err = drgn_error_create_os("Error reading kallsyms", errno, "/proc/kallsyms"); + + // Append the final symbol + if (!err && sym.name && !drgn_symbol_index_builder_add(&builder, &sym)) + err = &drgn_enomem; + free((char *)sym.name); + + if (!err) + err = drgn_symbol_index_init_from_builder(ret, &builder); + return err; +} diff --git a/libdrgn/kallsyms.h b/libdrgn/kallsyms.h new file mode 100644 index 000000000..cc95be488 --- /dev/null +++ b/libdrgn/kallsyms.h @@ -0,0 +1,46 @@ +// Copyright (c) 2024 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * Kallsyms data handling + * + * See @ref Kallsyms + */ + +#ifndef DRGN_KALLSYMS_H +#define DRGN_KALLSYMS_H + +#include +#include + +#include "hash_table.h" +#include "symbol.h" + +struct kallsyms_locations { + uint64_t kallsyms_names; + uint64_t kallsyms_token_table; + uint64_t kallsyms_token_index; + uint64_t kallsyms_num_syms; + uint64_t kallsyms_offsets; + uint64_t kallsyms_relative_base; + uint64_t kallsyms_addresses; + uint64_t _stext; +}; + +/** + * Initialize a symbol index containing symbols from /proc/kallsyms + */ +struct drgn_error *drgn_load_proc_kallsyms(const char *filename, bool modules, + struct drgn_symbol_index *ret); + +/** + * Initialize a symbol index containing symbols from built-in kallsyms tables + */ +struct drgn_error * +drgn_load_builtin_kallsyms(struct drgn_program *prog, + struct kallsyms_locations *loc, + struct drgn_symbol_index *ret); + +#endif // DRGN_KALLSYMS_H diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 8c2dcd06f..7cfcfad9a 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -353,6 +353,8 @@ struct index_arg { }; int index_converter(PyObject *o, void *p); +int u64_converter(PyObject *o, void *p); + struct path_arg { bool allow_fd; bool allow_none; @@ -396,5 +398,9 @@ DrgnObject *drgnpy_linux_helper_pid_task(PyObject *self, PyObject *args, DrgnObject *drgnpy_linux_helper_find_task(PyObject *self, PyObject *args); PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *arg); PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *arg); +PyObject *drgnpy_linux_helper_load_proc_kallsyms(PyObject *self, PyObject *args, + PyObject *kwds); +PyObject *drgnpy_linux_helper_load_builtin_kallsyms(PyObject *self, PyObject *args, + PyObject *kwds); #endif /* DRGNPY_H */ diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 82d03018e..bc843c286 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -3,6 +3,7 @@ #include "drgnpy.h" #include "../helpers.h" +#include "../kallsyms.h" #include "../program.h" PyObject *drgnpy_linux_helper_direct_mapping_offset(PyObject *self, PyObject *arg) @@ -291,3 +292,56 @@ PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *arg) return PyErr_Format(PyExc_ValueError, "not Linux kernel"); Py_RETURN_BOOL(prog->prog.vmcoreinfo.pgtable_l5_enabled); } + +PyObject *drgnpy_linux_helper_load_proc_kallsyms(PyObject *self, PyObject *args, + PyObject *kwds) + +{ + static char *kwnames[] = {"filename", "modules", NULL}; + const char *filename = "/proc/kallsyms"; + int modules = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|sp:load_proc_kallsyms", + kwnames, &filename, &modules)) + return NULL; + + _cleanup_pydecref_ SymbolIndex *index = call_tp_alloc(SymbolIndex); + if (!index) + return set_drgn_error(&drgn_enomem); + + struct drgn_error *err = drgn_load_proc_kallsyms(filename, modules, &index->index); + if (err) + return set_drgn_error(err); + return (PyObject *)no_cleanup_ptr(index); +} + +PyObject * +drgnpy_linux_helper_load_builtin_kallsyms(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *kwnames[] = {"prog", "names", "token_table", "token_index", "num_syms", + "offsets", "relative_base", "addresses", "_stext", NULL}; + struct kallsyms_locations kl; + PyObject *prog_obj; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&O&O&O&O&O&O&O&:load_builtin_kallsyms", + kwnames, &Program_type, &prog_obj, + u64_converter, &kl.kallsyms_names, + u64_converter, &kl.kallsyms_token_table, + u64_converter, &kl.kallsyms_token_index, + u64_converter, &kl.kallsyms_num_syms, + u64_converter, &kl.kallsyms_offsets, + u64_converter, &kl.kallsyms_relative_base, + u64_converter, &kl.kallsyms_addresses, + u64_converter, &kl._stext)) + return NULL; + + struct drgn_program *prog = &((Program *)prog_obj)->prog; + _cleanup_pydecref_ SymbolIndex *index = call_tp_alloc(SymbolIndex); + if (!index) + return set_drgn_error(&drgn_enomem); + + struct drgn_error *err = drgn_load_builtin_kallsyms(prog, &kl, &index->index); + if (err) + return set_drgn_error(err); + return (PyObject *)no_cleanup_ptr(index); +} diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index cd9e93874..2da8b10a4 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -208,6 +208,12 @@ static PyMethodDef drgn_methods[] = { METH_O}, {"_linux_helper_pgtable_l5_enabled", drgnpy_linux_helper_pgtable_l5_enabled, METH_O}, + {"_linux_helper_load_proc_kallsyms", + (PyCFunction)drgnpy_linux_helper_load_proc_kallsyms, + METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_load_builtin_kallsyms", + (PyCFunction)drgnpy_linux_helper_load_builtin_kallsyms, + METH_VARARGS | METH_KEYWORDS}, {}, }; diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index 16200f456..40b09f36f 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -78,6 +78,17 @@ int index_converter(PyObject *o, void *p) } } +int u64_converter(PyObject *o, void *p) +{ + uint64_t *arg = p; + + _cleanup_pydecref_ PyObject *index_obj = PyNumber_Index(o); + if (!index_obj) + return 0; + *arg = PyLong_AsUint64(index_obj); + return (*arg != UINT64_C(-1) || !PyErr_Occurred()); +} + int path_converter(PyObject *o, void *p) { if (o == NULL) { diff --git a/tests/linux_kernel/helpers/test_kallsyms.py b/tests/linux_kernel/helpers/test_kallsyms.py new file mode 100644 index 000000000..4533de2c4 --- /dev/null +++ b/tests/linux_kernel/helpers/test_kallsyms.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024 Oracle and/or its affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later +import re +import tempfile +from unittest import TestCase + +from drgn import Symbol, SymbolBinding, SymbolKind +from drgn.helpers.linux.kallsyms import _load_builtin_kallsyms, _load_proc_kallsyms +from tests.linux_kernel import LinuxKernelTestCase + + +def compare_local_symbols(self, finder, modules=False): + expr = re.compile( + r"(?P
[0-9a-f]+) (?P.) " r"(?P[^\s]+)(\s+(?P\[\w+\]))?" + ) + names = {} + count = 0 + with open("/proc/kallsyms") as f: + for line in f: + match = expr.fullmatch(line.strip()) + self.assertIsNotNone(match, line) + if match.group("mod") and not modules: + break + count += 1 + name = match.group("name") + addr = int(match.group("address"), 16) + names.setdefault(name, []).append((addr, match.group("kind"), name)) + + for name, syms in names.items(): + res = finder(None, name, None, False) + expected_addrs = sorted(t[0] for t in syms) + found_addrs = sorted(s.address for s in res) + self.assertEqual(expected_addrs, found_addrs) + + all_res = finder(None, None, None, False) + self.assertEqual(count, len(all_res)) + + +KALLSYMS_DATA = b"""\ +0000000000000000 u null +0000000000000008 d local_data +0000000000000010 B global_bss +0000000000000020 v weak_symbol +0000000000000040 ? unknown +0000000000001000 T text [mymod] +0000000000002000 T modfunc1 [mymod2] +0000000000002010 T modfunc2 [mymod2] +0000000000002008 T modfunc3 [mymod2] +""" + + +class TestProcKallsyms(TestCase): + def test_local_proc_kallsyms(self): + finder = _load_proc_kallsyms() + compare_local_symbols(self, finder) + + def test_local_proc_kallsyms_with_modules(self): + finder = _load_proc_kallsyms(modules=True) + compare_local_symbols(self, finder, modules=True) + + def test_static_data(self): + with tempfile.NamedTemporaryFile() as f: + f.write(KALLSYMS_DATA) + f.flush() + finder = _load_proc_kallsyms(filename=f.name, modules=True) + + syms = finder(None, None, None, False) + expected = [ + Symbol("null", 0x0, 8, SymbolBinding.UNIQUE, SymbolKind.UNKNOWN), + Symbol("local_data", 0x8, 8, SymbolBinding.UNKNOWN, SymbolKind.OBJECT), + Symbol("global_bss", 0x10, 16, SymbolBinding.GLOBAL, SymbolKind.OBJECT), + Symbol("weak_symbol", 0x20, 32, SymbolBinding.WEAK, SymbolKind.OBJECT), + # this one has zero size since it is at the end of vmlinux + Symbol("unknown", 0x40, 0, SymbolBinding.UNKNOWN, SymbolKind.UNKNOWN), + # this one has zero size since it is at the end of a module + Symbol("text", 0x1000, 0, SymbolBinding.GLOBAL, SymbolKind.FUNC), + # this one has a non-zero size since it is within a module + Symbol("modfunc1", 0x2000, 16, SymbolBinding.GLOBAL, SymbolKind.FUNC), + # this one has a zero size since it is at the end of the file. It is + # returned in sorted order by address despite kallsyms not + # containing it in sorted order. + Symbol("modfunc3", 0x2008, 0, SymbolBinding.GLOBAL, SymbolKind.FUNC), + # this one has a zero size since it is followed by an out-of-order + # symbol + Symbol("modfunc2", 0x2010, 0, SymbolBinding.GLOBAL, SymbolKind.FUNC), + ] + self.assertEqual(syms, expected) + + +class TestBuiltinKallsyms(LinuxKernelTestCase): + def test_builtin_kallsyms(self): + if b"kallsyms_num_syms" not in self.prog["VMCOREINFO"].string_(): + self.skipTest("VMCOREINFO is missing necessary symbols") + finder = _load_builtin_kallsyms(self.prog) + compare_local_symbols(self, finder) From 19ab162ef4fc16e07f9a0825fb0cbe9bafefe458 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 18 Mar 2024 11:57:23 -0700 Subject: [PATCH 5/6] helpers: linux: add module kallsyms helpers Add Python helpers which load module kallsyms and return a symbol index for them. Unlike the /proc/kallsyms and built-in kallsyms, these are quite easy to handle using regular Python & drgn code, so implement them as Python helpers. There are (at least) two use cases for these helpers: 1. After loading CTF and built-in vmlinux kallsyms, support for module kallsyms is still necessary. 2. Sometimes, people only extract vmlinux DWARF debuginfo. Adding module symbols can allow stack traces and other symbolization to work even without module debuginfo. Signed-off-by: Stephen Brennan --- drgn/helpers/linux/kallsyms.py | 166 +++++++++++++++++++- tests/linux_kernel/helpers/test_kallsyms.py | 20 ++- 2 files changed, 182 insertions(+), 4 deletions(-) diff --git a/drgn/helpers/linux/kallsyms.py b/drgn/helpers/linux/kallsyms.py index bd2c0887e..448cbe239 100644 --- a/drgn/helpers/linux/kallsyms.py +++ b/drgn/helpers/linux/kallsyms.py @@ -13,16 +13,26 @@ """ import os import re -from typing import Dict +from typing import Dict, List, Tuple from _drgn import ( _linux_helper_load_builtin_kallsyms, _linux_helper_load_proc_kallsyms as _load_proc_kallsyms, ) -from drgn import Program, ProgramFlags, SymbolIndex +from drgn import ( + Object, + Program, + ProgramFlags, + Symbol, + SymbolBinding, + SymbolIndex, + SymbolKind, +) +from drgn.helpers.linux.module import for_each_module __all__ = ( "load_vmlinux_kallsyms", + "load_module_kallsyms", ) @@ -73,3 +83,155 @@ def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex: return _load_proc_kallsyms() else: return _load_builtin_kallsyms(prog) + + +def _nm_type_to_binding_kind(code: str) -> Tuple[SymbolBinding, SymbolKind]: + binding = SymbolBinding.UNKNOWN + kind = SymbolKind.UNKNOWN + if code == "v": + binding = SymbolBinding.WEAK + kind = SymbolKind.OBJECT + elif code == "w": + binding = SymbolBinding.WEAK + elif code in "tT": + kind = SymbolKind.FUNC + elif code.lower() in "srbgncd": + kind = SymbolKind.OBJECT + if binding == SymbolBinding.UNKNOWN and code.isupper(): + binding = SymbolBinding.GLOBAL + return binding, kind + + +def _st_info_to_binding_kind(info: int) -> Tuple[SymbolBinding, SymbolKind]: + binding_int = info >> 4 + STB_WEAK = 2 + STB_GNU_UNIQUE = 10 + if binding_int <= STB_WEAK or binding_int == STB_GNU_UNIQUE: + binding = SymbolBinding(binding_int + 1) + else: + binding = SymbolBinding.UNKNOWN + type_ = info & 0xF + STT_TLS = 6 + STT_GNU_IFUNC = 10 + if type_ <= STT_TLS or type_ == STT_GNU_IFUNC: + kind = SymbolKind(type_) + else: + kind = SymbolKind.UNKNOWN + return binding, kind + + +def _elf_sym_to_symbol(name: str, obj: Object, has_typetab: bool) -> Symbol: + # Linux likes to have the nm(1) character code for its symbols, which it + # refers to as the symbol's "type" (this is of course distinct from the ELF + # notion of a symbol type, let alone what drgn considers a "type"...). + # + # Prior to 5439c985c5a8 ("module: Overwrite st_size instead of st_info"), + # merged in v5.0, the kernel simply overwrote the "st_info" field with a + # single-character code that represents the nm(1) character code for that + # symbol. However, starting with that commit, it was switched to overwrite + # the "st_size" field instead! This was thankfully fixed in v5.2 with + # 1c7651f43777 ("kallsyms: store type information in its own array"). + # + # Unfortunately, this leaves us with three possibilities: + # 1. Pre-v5.0: interpret the "st_info" as a character from nm(1) and try to + # infer the kind and bindings. + # 2. 5.0-5.2: interpret the "st_info" as normal, but ignore the "st_size" + # field since it is bogus. + # 3. 5.2+: both fields are valid, and the nm(1) code is stored in "typetab". + # + # Case 3 can be determined easily by the presence of "typetab" in "struct + # mod_kallsyms". However, cases 1 & 2 are indistinguishable. For our + # purposes, it makes more sense to fall back to case 1. After all, neither + # 5.0 or 5.1 were LTS kernels, nor are they actively used by any major + # distro. We have no way to deal with 5.0 or 5.1, whereas we can make some + # informed guesses for pre-5.0 based on the nm(1) code. + if has_typetab: + binding, kind = _st_info_to_binding_kind(obj.st_info.value_()) + else: + binding, kind = _nm_type_to_binding_kind(chr(obj.st_info.value_())) + return Symbol( # type: ignore + name, + obj.st_value.value_(), + obj.st_size.value_(), + binding, + kind, + ) + + +def _module_kallsyms(module: Object) -> List[Symbol]: + """ + Return a list of symbols for a kernel module + + When compiled with ``CONFIG_KALLSYMS``, the kernel maintains ELF symbol + information about each module within ``struct module``. This function + accesses this symbol information, and returns a list of drgn :class:`Symbol` + objects for the module. Keep in mind that unless ``CONFIG_KALLSYMS_ALL`` is + enabled, these symbols are typically only function symbols. + + :param module: :class:`Object` of type ``struct module *`` + :returns: a list of symbols + """ + try: + ks = module.kallsyms + except AttributeError: + # Prior to 8244062ef1e54 ("modules: fix longstanding /proc/kallsyms vs + # module insertion race."), the kallsyms variables were stored directly + # on the module object. This commit was introduced in 4.5, but was + # backported to some stable kernels too. Fall back to the module object + # in cases where kallsyms field isn't available. + ks = module + + prog = module.prog_ + num_symtab = ks.num_symtab.value_() + try: + ks.member_("typetab") + has_typetab = True + except LookupError: + has_typetab = False + + # The symtab field is a pointer, but it points at an array of Elf_Sym + # objects. Indexing it requires drgn to do pointer arithmetic and issue a + # lot of very small /proc/kcore reads, which can be a real performance + # issue. So convert it into an object representing a correctly-sized array, + # and then read that object all at once. This does one /proc/kcore read, + # which is a major improvement! + symtab = Object( + prog, + type=prog.array_type(ks.symtab.type_.type, num_symtab), + address=ks.symtab.value_(), + ).read_() + + # The strtab is similarly a pointer into a contigous array of strings packed + # next to each other. Reading individual strings from /proc/kcore can be + # quite slow. So read the entire array of bytes into a Python bytes value, + # and we'll extract the individual symbol strings from there. + last_string_start = symtab[num_symtab - 1].st_name.value_() + last_string_len = len(ks.strtab[last_string_start].address_of_().string_()) + 1 + strtab = prog.read(ks.strtab.value_(), last_string_start + last_string_len) + syms = [] + for i in range(ks.num_symtab.value_()): + elfsym = symtab[i] + if not elfsym.st_name: + continue + str_index = elfsym.st_name.value_() + nul_byte = strtab.find(b"\x00", str_index) + name = strtab[str_index:nul_byte].decode("ascii") + syms.append(_elf_sym_to_symbol(name, elfsym, has_typetab)) + return syms + + +def load_module_kallsyms(prog: Program) -> SymbolIndex: + """ + Return a symbol index containing all module symbols from kallsyms + + For kernels built with ``CONFIG_KALLSYMS``, loaded kernel modules contain + an ELF symbol table in kernel memory. This function can parse those data + structures and create a symbol index usable by drgn. However, it requires + that you already have debuginfo for the vmlinux image. + + :returns: a symbol index containing all symbols from module kallsyms + """ + all_symbols = [] + for module in for_each_module(prog): + all_symbols.extend(_module_kallsyms(module)) + return SymbolIndex(all_symbols) diff --git a/tests/linux_kernel/helpers/test_kallsyms.py b/tests/linux_kernel/helpers/test_kallsyms.py index 4533de2c4..104f6fae5 100644 --- a/tests/linux_kernel/helpers/test_kallsyms.py +++ b/tests/linux_kernel/helpers/test_kallsyms.py @@ -5,8 +5,12 @@ from unittest import TestCase from drgn import Symbol, SymbolBinding, SymbolKind -from drgn.helpers.linux.kallsyms import _load_builtin_kallsyms, _load_proc_kallsyms -from tests.linux_kernel import LinuxKernelTestCase +from drgn.helpers.linux.kallsyms import ( + _load_builtin_kallsyms, + _load_proc_kallsyms, + load_module_kallsyms, +) +from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod def compare_local_symbols(self, finder, modules=False): @@ -93,3 +97,15 @@ def test_builtin_kallsyms(self): self.skipTest("VMCOREINFO is missing necessary symbols") finder = _load_builtin_kallsyms(self.prog) compare_local_symbols(self, finder) + + @skip_unless_have_test_kmod + def test_module_kallsyms(self): + finder = load_module_kallsyms(self.prog) + test_data = finder(None, "drgn_test_empty_list", None, True)[0] + self.assertEqual("drgn_test_empty_list", test_data.name) + self.assertEqual(SymbolKind.OBJECT, test_data.kind) + self.assertIn(test_data.binding, (SymbolBinding.GLOBAL, SymbolBinding.UNKNOWN)) + size = self.prog.type("struct list_head").size + self.assertEqual(size, test_data.size) + address = self.prog.object("drgn_test_empty_list").address_ + self.assertEqual(address, test_data.address) From ee3f77ffc9e4213f34efc970874abbea552698c4 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 18 Mar 2024 00:20:32 -0700 Subject: [PATCH 6/6] libdrgn: add lifetime to drgn_symbol, reduce copying The SymbolIndex structure owns the memory for symbols and names, and once added to the Program, it cannot be removed. Making copies of any of these symbols is purely a waste: we should be able to treat them as static. So add a lifetime and allow the SymbolIndex to specify static, avoiding unnecessary copies and frees. Signed-off-by: Stephen Brennan --- libdrgn/kallsyms.c | 1 + libdrgn/symbol.c | 29 ++++++++++------------------- libdrgn/symbol.h | 1 + 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/libdrgn/kallsyms.c b/libdrgn/kallsyms.c index 901965409..e7a12fd14 100644 --- a/libdrgn/kallsyms.c +++ b/libdrgn/kallsyms.c @@ -345,6 +345,7 @@ static void symbol_from_kallsyms(uint64_t address, char *name, char kind, ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; } ret->name_lifetime = DRGN_LIFETIME_STATIC; + ret->lifetime = DRGN_LIFETIME_STATIC; // avoid copying } /** Compute an address via the CONFIG_KALLSYMS_ABSOLUTE_PERCPU method*/ diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 786d8e0b5..51177deb0 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -16,6 +16,8 @@ DEFINE_VECTOR_FUNCTIONS(symbol_vector); LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { + if (sym && sym->lifetime == DRGN_LIFETIME_STATIC) + return; if (sym && sym->name_lifetime == DRGN_LIFETIME_OWNED) /* Cast here is necessary - we want symbol users to * never modify sym->name, but when we own the name, @@ -37,6 +39,7 @@ void drgn_symbol_from_elf(const char *name, uint64_t address, { ret->name = name; ret->name_lifetime = DRGN_LIFETIME_STATIC; + ret->lifetime = DRGN_LIFETIME_OWNED; ret->address = address; ret->size = elf_sym->st_size; int binding = GELF_ST_BIND(elf_sym->st_info); @@ -84,6 +87,7 @@ drgn_symbol_create(const char *name, uint64_t address, uint64_t size, sym->binding = binding; sym->kind = kind; sym->name_lifetime = name_lifetime; + sym->lifetime = DRGN_LIFETIME_OWNED; *ret = sym; return NULL; } @@ -314,21 +318,6 @@ static void address_search_range(struct drgn_symbol_index *index, uint64_t addre #undef less_than_end } -/** Allocate a copy of the symbol and add to it the builder */ -static bool add_symbol_result(struct drgn_symbol_result_builder *builder, - struct drgn_symbol *symbol) -{ - struct drgn_symbol *copy = malloc(sizeof(*copy)); - if (!copy) - return false; - *copy = *symbol; - if (!drgn_symbol_result_builder_add(builder, copy)) { - free(copy); - return false; - } - return true; -} - struct drgn_error * drgn_symbol_index_find(const char *name, uint64_t address, enum drgn_find_symbol_flags flags, void *arg, @@ -352,7 +341,7 @@ drgn_symbol_index_find(const char *name, uint64_t address, if ((flags & DRGN_FIND_SYMBOL_NAME) && strcmp(s->name, name) != 0) continue; - if (!add_symbol_result(builder, s)) + if (!drgn_symbol_result_builder_add(builder, s)) return &drgn_enomem; if (flags & DRGN_FIND_SYMBOL_ONE) break; @@ -364,7 +353,7 @@ drgn_symbol_index_find(const char *name, uint64_t address, return NULL; for (uint32_t i = it.entry->value.start; i < it.entry->value.end; i++) { struct drgn_symbol *s = &index->symbols[index->name_sort[i]]; - if (!add_symbol_result(builder, s)) + if (!drgn_symbol_result_builder_add(builder, s)) return &drgn_enomem; if (flags & DRGN_FIND_SYMBOL_ONE) break; @@ -372,7 +361,7 @@ drgn_symbol_index_find(const char *name, uint64_t address, } else { for (int i = 0; i < index->num_syms; i++) { struct drgn_symbol *s = &index->symbols[i]; - if (!add_symbol_result(builder, s)) + if (!drgn_symbol_result_builder_add(builder, s)) return &drgn_enomem; if (flags & DRGN_FIND_SYMBOL_ONE) break; @@ -426,11 +415,13 @@ drgn_symbol_index_init_from_builder(struct drgn_symbol_index *index, // Now that the name array is finalized, resolve the names to real // pointers. Update the name lifetime to static, reflecting that the // symbol name is owned by the finder whose lifetime is bound to the - // program's once it is attached. + // program's once it is attached. The same goes for the symbol. Using + // static lifetimes helps avoid unnecessary copying. for (size_t i = 0; i < num_syms; i++) { size_t string_index = (size_t)symbols[i].name; symbols[i].name = &names[string_index]; symbols[i].name_lifetime = DRGN_LIFETIME_STATIC; + symbols[i].lifetime = DRGN_LIFETIME_STATIC; } if (num_syms > UINT32_MAX) { diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index 410ebe2a3..c3dd75ca7 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -21,6 +21,7 @@ struct drgn_symbol { enum drgn_symbol_binding binding; enum drgn_symbol_kind kind; enum drgn_lifetime name_lifetime; + enum drgn_lifetime lifetime; }; struct drgn_symbol_finder {