From 07b92704d479bab26b100fca7d50767fdce53bbb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 26 Apr 2024 15:27:15 -0700 Subject: [PATCH 1/6] libdrgn: add interface for registering chains of named handlers This will be used to allow providing names for type, object, and symbol finders and configuring which ones are called and in what order. We might even want this for memory readers. I'm assuming there will only be a handful of handlers on a given list, but the enabled handlers will be called frequently, and there may be many lists. The implementation is therefore optimized for fast iteration and small size, and we don't cache anything that would speed up reconfiguring the list. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 + libdrgn/drgn.h | 7 +++ libdrgn/handler.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ libdrgn/handler.h | 79 +++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 libdrgn/handler.c create mode 100644 libdrgn/handler.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 92e06e09a..afb261c70 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -63,6 +63,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ error.c \ error.h \ generics.h \ + handler.c \ + handler.h \ hash_table.c \ hash_table.h \ helpers.h \ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index ad897fba7..76d3a7ef6 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -580,6 +580,13 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, */ bool drgn_filename_matches(const char *haystack, const char *needle); +enum { + /** Enable a handler after all enabled handlers. */ + DRGN_HANDLER_REGISTER_ENABLE_LAST = SIZE_MAX, + /** Don't enable a handler. */ + DRGN_HANDLER_REGISTER_DONT_ENABLE = SIZE_MAX - 1, +}; + /** * Callback for finding a type. * diff --git a/libdrgn/handler.c b/libdrgn/handler.c new file mode 100644 index 000000000..3d06182b2 --- /dev/null +++ b/libdrgn/handler.c @@ -0,0 +1,138 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "cleanup.h" +#include "drgn.h" +#include "handler.h" +#include "hash_table.h" +#include "util.h" + +struct drgn_error *drgn_handler_list_register(struct drgn_handler_list *list, + struct drgn_handler *new_handler, + size_t enable_idx, + const char *what) +{ + struct drgn_handler **insert_pos = &list->head; + size_t num_enabled = 0; + for (struct drgn_handler *handler = list->head; handler; + handler = handler->next) { + if (strcmp(new_handler->name, handler->name) == 0) { + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "duplicate %s name '%s'", + what, handler->name); + } + if (handler->enabled && num_enabled < enable_idx) { + insert_pos = &handler->next; + num_enabled++; + } + } + new_handler->enabled = enable_idx != DRGN_HANDLER_REGISTER_DONT_ENABLE; + new_handler->next = *insert_pos; + *insert_pos = new_handler; + return NULL; +} + +#define drgn_handler_list_for_each_registered(handler, list) \ + for (struct drgn_handler *handler = (list)->head; handler; \ + handler = handler->next) + +struct drgn_error *drgn_handler_list_registered(struct drgn_handler_list *list, + const char ***names_ret, + size_t *count_ret) +{ + size_t n = 0; + drgn_handler_list_for_each_registered(handler, list) + n++; + const char **names = malloc_array(n, sizeof(names[0])); + if (!names) + return &drgn_enomem; + size_t i = 0; + drgn_handler_list_for_each_registered(handler, list) + names[i++] = handler->name; + *names_ret = names; + *count_ret = n; + return NULL; +} + +static inline const char *drgn_handler_entry_to_key(const uintptr_t *entry) +{ + return ((struct drgn_handler *)(*entry & ~1))->name; +} + +DEFINE_HASH_TABLE(drgn_handler_table, uintptr_t, drgn_handler_entry_to_key, + c_string_key_hash_pair, c_string_key_eq); + +struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, + const char * const *names, + size_t count, const char *what) +{ + // Put all of the handlers in a hash table of tagged pointers. + _cleanup_(drgn_handler_table_deinit) + struct drgn_handler_table table = HASH_TABLE_INIT; + drgn_handler_list_for_each_registered(handler, list) { + uintptr_t entry = (uintptr_t)handler; + if (drgn_handler_table_insert(&table, &entry, NULL) < 0) + return &drgn_enomem; + } + + // Check the list of names. + for (size_t i = 0; i < count; i++) { + auto it = drgn_handler_table_search(&table, &names[i]); + if (!it.entry) { + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "%s '%s' not found", what, + names[i]); + } + if (*it.entry & 1) { + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "%s '%s' enabled multiple times", + what, names[i]); + } + *it.entry |= 1; + } + + // Insert the enabled handlers and delete them from the hash table. + struct drgn_handler **handlerp = &list->head; + for (size_t i = 0; i < count; i++) { + auto it = drgn_handler_table_search(&table, &names[i]); + struct drgn_handler *handler = + (struct drgn_handler *)(*it.entry & ~1); + handler->enabled = true; + *handlerp = handler; + handlerp = &handler->next; + drgn_handler_table_delete_iterator(&table, it); + } + + // The remaining handlers in the hash table are disabled. Insert them. + for (auto it = drgn_handler_table_first(&table); it.entry; + it = drgn_handler_table_next(it)) { + struct drgn_handler *handler = (struct drgn_handler *)*it.entry; + handler->enabled = false; + *handlerp = handler; + handlerp = &handler->next; + } + *handlerp = NULL; + + return NULL; +} + +struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, + const char ***names_ret, + size_t *count_ret) +{ + size_t n = 0; + drgn_handler_list_for_each_enabled(struct drgn_handler, handler, list) + n++; + const char **names = malloc_array(n, sizeof(names[0])); + if (!names) + return &drgn_enomem; + size_t i = 0; + drgn_handler_list_for_each_enabled(struct drgn_handler, handler, list) + names[i++] = handler->name; + *names_ret = names; + *count_ret = n; + return NULL; +} diff --git a/libdrgn/handler.h b/libdrgn/handler.h new file mode 100644 index 000000000..2f7506a85 --- /dev/null +++ b/libdrgn/handler.h @@ -0,0 +1,79 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * Chains of named handlers. + */ + +#ifndef DRGN_HANDLER_H +#define DRGN_HANDLER_H + +#include + +// This should be embedded as the first member in a structure containing the +// handler implementation. +struct drgn_handler { + const char *name; + struct drgn_handler *next; + bool enabled; + // Whether this structure and name need to be freed. + bool free; +}; + +// This is optimized for frequent drgn_handler_list_for_each_enabled() +// operations; everything else is expected to be rare, so we keep this as small +// as possible. +struct drgn_handler_list { + // All enabled handlers are first, in order, followed by disabled + // handlers in no particular order. + struct drgn_handler *head; +}; + +// handler->name and handler->free must be initialized. +struct drgn_error *drgn_handler_list_register(struct drgn_handler_list *list, + struct drgn_handler *handler, + size_t enable_index, + const char *what); + +struct drgn_error *drgn_handler_list_registered(struct drgn_handler_list *list, + const char ***names_ret, + size_t *count_ret); + +struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, + const char * const *names, + size_t count, + const char *what); + +struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, + const char ***names_ret, + size_t *count_ret); + +// Helper to simplify the casting and naming in drgn_handler_list_deinit(). +static inline struct drgn_handler * +drgn_handler_free_and_next(struct drgn_handler *handler) +{ + struct drgn_handler *next = handler->next; + if (handler->free) { + free((char *)handler->name); + free(handler); + } + return next; +} + +// Free all registered handlers, optionally executing a statement for each one. +#define drgn_handler_list_deinit(type, handler, list, ...) do { \ + type *handler = (type *)(list)->head; \ + while (handler) { \ + __VA_ARGS__ \ + handler = (type *)drgn_handler_free_and_next((struct drgn_handler *)handler);\ + } \ +} while (0) + +#define drgn_handler_list_for_each_enabled(type, handler, list) \ + for (type *handler = (type *)(list)->head; \ + handler && ((struct drgn_handler *)handler)->enabled; \ + handler = (type *)((struct drgn_handler *)handler)->next) + +#endif /* DRGN_HANDLER_H */ From 8ebbb4f6ee7e5e1f42a5b7cdc0f14bc4b44c8025 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 26 Apr 2024 15:33:37 -0700 Subject: [PATCH 2/6] Allow naming and configuring order of symbol finders Currently, the bare-bones add_symbol_finder() interface only allows adding a symbol finder that is called before any existing finders. It'd be useful to be able to specify the order that symbol finders should be called in and to selectively enable and disable them. To do that, we also need finders to have a name to identify them by. So, replace add_symbol_finder() (which hasn't been in a release yet) with a set of interfaces providing this flexibility: register_symbol_finder(), set_enabled_symbol_finders(), registered_symbol_finders(), and enabled_symbol_finders(). Also change the callback signature to take the program. In particular, this flexibility will be very useful for a plugin system: pre-installed plugins can register symbol finders that the user can choose to enable or disable. Signed-off-by: Omar Sandoval --- _drgn.pyi | 86 +++++++++++----- libdrgn/debug_info.c | 8 +- libdrgn/drgn.h | 111 +++++++++++++++------ libdrgn/program.c | 127 +++++++++++++++--------- libdrgn/program.h | 14 ++- libdrgn/python/program.c | 207 ++++++++++++++++++++++++++++++++------- libdrgn/symbol.h | 6 +- tests/test_program.py | 90 +++++++++++++++++ tests/test_symbol.py | 4 +- 9 files changed, 507 insertions(+), 146 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 2048653d0..d4184e0f0 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -21,6 +21,7 @@ from typing import ( Mapping, Optional, Sequence, + Set, Tuple, Union, overload, @@ -435,6 +436,64 @@ class Program: another :ref:`buffer ` type. """ ... + def register_symbol_finder( + self, + name: str, + fn: Callable[[Program, Optional[str], Optional[int], bool], Sequence[Symbol]], + *, + enable_index: Optional[int] = None, + ) -> None: + """ + Register a callback for finding symbols in the program. + + This does not enable the finder unless *enable_index* is given. + + The callback should take four arguments: the program, a *name*, an + *address*, and a boolean flag *one*. It should return a list of symbols + or an empty list if no matches are found. + + If *name* is not ``None``, then only symbols with that name should be + returned. If *address* is not ``None``, then only symbols containing + that address should be returned. If neither is ``None``, then the + returned symbols must match both. If both are ``None``, then all + symbols should be considered matching. + + When the *one* flag is ``False``, the callback should return a list of + all matching symbols. When it is ``True``, it should return a list with + at most one symbol which is the best match. + + :param name: Finder name. + :param fn: Callable taking ``(prog, name, address, one)`` and returning + a sequence of :class:`Symbol`\\ s. + :param enable_index: Insert the finder into the list of enabled finders + at the given index. If -1 or greater than the number of enabled + finders, insert it at the end. If ``None`` or not given, don't + enable the finder. + :raises ValueError: if there is already a finder with the given name + """ + ... + def registered_symbol_finders(self) -> Set[str]: + """Return the names of all registered symbol finders.""" + ... + def set_enabled_symbol_finders(self, names: Sequence[str]) -> None: + """ + Set the list of enabled symbol finders. + + Finders are called in the same order as the list. When the *one* flag + is set, the search will short-circuit after the first finder which + returns a result, and subsequent finders will not be called. Otherwise, + all callbacks will be called, and all results will be returned. + + Finders that are not in the list are not called. + + :param names: Names of finders to enable, in order. + :raises ValueError: if no finder has a given name or the same name is + given more than once + """ + ... + def enabled_symbol_finders(self) -> List[str]: + """Return the names of enabled symbol finders, in order.""" + ... def add_type_finder( self, fn: Callable[[TypeKind, str, Optional[str]], Optional[Type]] ) -> None: @@ -468,33 +527,6 @@ class Program: return an :class:`Object` or ``None`` if not found. """ ... - def add_symbol_finder( - self, fn: Callable[[Optional[str], Optional[int], bool], Sequence[Symbol]] - ) -> None: - """ - Register a callback for finding symbols in the program. - - The callback should take three arguments: a search name, a search - address, and a boolean flag 'one' indicating whether to return only - the single best match. When the 'one' flag is True, the callback should - return a list containing at most one :class:`Symbol`. When the flag is - False, the callback should return a list of all matching - :class:`Symbol`\\ s. Both the name and address arguments are optional. - If both are provided, then the result(s) should match both. If neither - are provided, the finder should return all available symbols. If no - result is found, the return should be an empty list. - - Callbacks are called in reverse order of the order they were added - (i.e,, the most recently added callback is called first). When the - 'one' flag is set, the search will short-circuit after the first - finder which returns a result, and subsequent finders will not be - called. Otherwise, all callbacks will be called, and all results will be - returned. - - :param fn: Callable taking name, address, and 'one' flag, and - returning a sequence of :class:`Symbol`\\ s. - """ - ... def set_core_dump(self, path: Union[Path, int]) -> None: """ Set the program to a core dump. diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 60f7c7d37..ede84af56 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2186,8 +2186,12 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_add_object_finder_impl(prog, &dbinfo->object_finder, drgn_debug_info_find_object, dbinfo); - drgn_program_add_symbol_finder_impl(prog, &dbinfo->symbol_finder, - elf_symbols_search, prog); + const struct drgn_symbol_finder_ops symbol_finder_ops = { + .find = elf_symbols_search, + }; + drgn_program_register_symbol_finder_impl(prog, &dbinfo->symbol_finder, + "elf", &symbol_finder_ops, + prog, 0); drgn_module_table_init(&dbinfo->modules); c_string_set_init(&dbinfo->module_names); drgn_dwarf_info_init(dbinfo); diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 76d3a7ef6..d5b47d882 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -937,7 +937,7 @@ struct drgn_error *drgn_program_find_symbols_by_address(struct drgn_program *pro struct drgn_symbol ***syms_ret, size_t *count_ret); -/** Flags for @ref drgn_symbol_find_fn() */ +/** Flags for @ref drgn_symbol_finder_ops::find() */ enum drgn_find_symbol_flags { /** Find symbols whose name matches the name argument */ DRGN_FIND_SYMBOL_NAME = 1 << 0, @@ -947,7 +947,7 @@ enum drgn_find_symbol_flags { DRGN_FIND_SYMBOL_ONE = 1 << 2, }; -/** Result builder for @ref drgn_symbol_find_fn() */ +/** Result builder for @ref drgn_symbol_finder_ops::find() */ struct drgn_symbol_result_builder; /** @@ -965,44 +965,97 @@ drgn_symbol_result_builder_add(struct drgn_symbol_result_builder *builder, /** Get the current number of results in a symbol search result. */ size_t drgn_symbol_result_builder_count(const struct drgn_symbol_result_builder *builder); +/** Symbol finder callback table. */ +struct drgn_symbol_finder_ops { + /** + * Callback to destroy the symbol finder. + * + * This may be @c NULL. + * + * @param[in] arg Argument passed to @ref + * drgn_program_register_symbol_finder(). + */ + void (*destroy)(void *arg); + /** + * Callback for finding one or more symbols. + * + * The callback should perform a symbol lookup based on the flags given + * in @p flags. When multiple flags are provided, the effect should be + * treated as a logical AND. Symbol results should be added to the + * result builder @p builder, via @ref drgn_symbol_result_builder_add(). + * When @ref DRGN_FIND_SYMBOL_ONE is set, then the finding function + * should only return the single best symbol result, and short-circuit + * return. + * + * When no symbol is found, simply do not add any result to the builder. + * No error should be returned in this case. + * + * @param[in] name Name of the symbol to match + * @param[in] addr Address of the symbol to match + * @param[in] flags Flags indicating the desired behavior of the search + * @param[in] arg Argument passed to @ref + * drgn_program_register_symbol_finder(). + * @param[in] builder Used to build the resulting symbol output + */ + struct drgn_error *(*find)(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder); +}; + /** - * Callback for finding one or more symbols. + * Register a symbol finding callback. * - * The callback should perform a symbol lookup based on the flags given in @a - * flags. When multiple flags are provided, the effect should be treated as a - * logical AND. Symbol results should be added to the result builder @a builder, - * via @ref drgn_symbol_result_builder_add(). When @ref DRGN_FIND_SYMBOL_ONE is - * set, then the finding function should only return the single best symbol - * result, and short-circuit return. + * @param[in] name Finder name. This is copied. + * @param[in] ops Callback table. This is copied. + * @param[in] arg Argument to pass to callbacks. + * @param[in] enable_index Insert the finder into the list of enabled finders at + * the given index. If @ref DRGN_HANDLER_REGISTER_ENABLE_LAST or greater than + * the number of enabled finders, insert it at the end. If @ref + * DRGN_HANDLER_REGISTER_DONT_ENABLE, don’t enable the finder. + */ +struct drgn_error * +drgn_program_register_symbol_finder(struct drgn_program *prog, const char *name, + const struct drgn_symbol_finder_ops *ops, + void *arg, size_t enable_index); + +/** + * Get the names of all registered symbol finders. * - * When no symbol is found, simply do not add any result to the builder. No - * error should be returned in this case. + * The order of the names is arbitrary. * - * @param[in] name Name of the symbol to match - * @param[in] addr Address of the symbol to match - * @param[in] flags Flags indicating the desired behavior of the search - * @param[in] arg Argument passed to @ref drgn_program_add_symbol_finder(). - * @param[in] builder Used to build the resulting symbol output + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. */ -typedef struct drgn_error * -(*drgn_symbol_find_fn)(const char *name, uint64_t addr, - enum drgn_find_symbol_flags flags, void *arg, - struct drgn_symbol_result_builder *builder); +struct drgn_error * +drgn_program_registered_symbol_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); /** - * Register a symbol finding callback. + * Set the list of enabled symbol finders. * - * Callbacks are called in reverse order that they were originally added. In - * case of a search for multiple symbols, then the results of all callbacks are - * concatenated. If the search is for a single symbol, then the first callback - * which finds a symbol will short-circuit the search. + * Finders are called in the same order as the list. In case of a search for + * multiple symbols, then the results of all callbacks are concatenated. If the + * search is for a single symbol, then the first callback which finds a symbol + * will short-circuit the search. * - * @param[in] fn Symbol search function - * @param[in] arg Argument to pass to the callback + * @param[in] names Names of finders to enable, in order. + * @param[in] count Number of names in @p names. + */ +struct drgn_error * +drgn_program_set_enabled_symbol_finders(struct drgn_program *prog, + const char * const *names, + size_t count); + +/** + * Get the names of enabled symbol finders, in order. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. */ struct drgn_error * -drgn_program_add_symbol_finder(struct drgn_program *prog, - drgn_symbol_find_fn fn, void *arg); +drgn_program_enabled_symbol_finders(struct drgn_program *prog, + const char ***names_ret, size_t *count_ret); /** Element type and size. */ struct drgn_element_info { diff --git a/libdrgn/program.c b/libdrgn/program.c index a845e8d63..76b4a251a 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -112,17 +112,6 @@ void drgn_program_init(struct drgn_program *prog, drgn_object_init(&prog->vmemmap, prog); } -static void drgn_program_deinit_symbol_finders(struct drgn_program *prog) -{ - struct drgn_symbol_finder *finder = prog->symbol_finders; - while (finder) { - struct drgn_symbol_finder *next = finder->next; - if (finder->free) - free(finder); - finder = next; - } -} - void drgn_program_deinit(struct drgn_program *prog) { if (prog->core_dump_notes_cached) { @@ -145,8 +134,12 @@ void drgn_program_deinit(struct drgn_program *prog) drgn_object_deinit(&prog->vmemmap); + drgn_handler_list_deinit(struct drgn_symbol_finder, finder, + &prog->symbol_finders, + if (finder->ops.destroy) + finder->ops.destroy(finder->arg); + ); drgn_object_index_deinit(&prog->oindex); - drgn_program_deinit_symbol_finders(prog); drgn_program_deinit_types(prog); drgn_memory_reader_deinit(&prog->reader); @@ -218,6 +211,81 @@ drgn_program_add_object_finder(struct drgn_program *prog, return drgn_program_add_object_finder_impl(prog, NULL, fn, arg); } +#define DRGN_PROGRAM_FINDER(which) \ +struct drgn_error * \ +drgn_program_register_##which##_finder_impl(struct drgn_program *prog, \ + struct drgn_##which##_finder *finder,\ + const char *name, \ + const struct drgn_##which##_finder_ops *ops,\ + void *arg, size_t enable_index) \ +{ \ + struct drgn_error *err; \ + if (finder) { \ + finder->handler.name = name; \ + finder->handler.free = false; \ + } else { \ + finder = malloc(sizeof(*finder)); \ + if (!finder) \ + return &drgn_enomem; \ + finder->handler.name = strdup(name); \ + if (!finder->handler.name) { \ + free(finder); \ + return &drgn_enomem; \ + } \ + finder->handler.free = true; \ + } \ + memcpy(&finder->ops, ops, sizeof(finder->ops)); \ + finder->arg = arg; \ + err = drgn_handler_list_register(&prog->which##_finders, \ + &finder->handler, enable_index, \ + #which " finder"); \ + if (err && finder->handler.free) { \ + free((char *)finder->handler.name); \ + free(finder); \ + } \ + return err; \ +} \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_program_register_##which##_finder(struct drgn_program *prog, const char *name,\ + const struct drgn_##which##_finder_ops *ops,\ + void *arg, size_t enable_index) \ +{ \ + return drgn_program_register_##which##_finder_impl(prog, NULL, name, \ + ops, arg, \ + enable_index); \ +} \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_program_registered_##which##_finders(struct drgn_program *prog, \ + const char ***names_ret, \ + size_t *count_ret) \ +{ \ + return drgn_handler_list_registered(&prog->which##_finders, names_ret, \ + count_ret); \ +} \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_program_set_enabled_##which##_finders(struct drgn_program *prog, \ + const char * const *names, \ + size_t count) \ +{ \ + return drgn_handler_list_set_enabled(&prog->which##_finders, names, \ + count, #which "finder"); \ +} \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_program_enabled_##which##_finders(struct drgn_program *prog, \ + const char ***names_ret, \ + size_t *count_ret) \ +{ \ + return drgn_handler_list_enabled(&prog->which##_finders, names_ret, \ + count_ret); \ +} + +DRGN_PROGRAM_FINDER(symbol) +#undef DRGN_PROGRAM_FINDER + static struct drgn_error * drgn_program_check_initialized(struct drgn_program *prog) { @@ -1801,46 +1869,17 @@ drgn_program_symbols_search(struct drgn_program *prog, const char *name, struct drgn_symbol_result_builder *builder) { struct drgn_error *err = NULL; - struct drgn_symbol_finder *finder = prog->symbol_finders; - while (finder) { - err = finder->fn(name, addr, flags, finder->arg, builder); + drgn_handler_list_for_each_enabled(struct drgn_symbol_finder, finder, + &prog->symbol_finders) { + err = finder->ops.find(name, addr, flags, finder->arg, builder); if (err || ((flags & DRGN_FIND_SYMBOL_ONE) && drgn_symbol_result_builder_count(builder) > 0)) break; - finder = finder->next; } return err; } -struct drgn_error * -drgn_program_add_symbol_finder_impl(struct drgn_program *prog, - struct drgn_symbol_finder *finder, - drgn_symbol_find_fn fn, void *arg) -{ - if (finder) { - finder->free = false; - } else { - finder = malloc(sizeof(*finder)); - if (!finder) - return &drgn_enomem; - finder->free = true; - } - finder->fn = fn; - finder->arg = arg; - finder->next = prog->symbol_finders; - prog->symbol_finders = finder; - return NULL; -} - -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_add_symbol_finder(struct drgn_program *prog, - drgn_symbol_find_fn fn, - void *arg) -{ - return drgn_program_add_symbol_finder_impl(prog, NULL, fn, arg); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_find_symbols_by_name(struct drgn_program *prog, const char *name, struct drgn_symbol ***syms_ret, diff --git a/libdrgn/program.h b/libdrgn/program.h index 0194f8948..ac674b5fd 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -21,6 +21,7 @@ #include "debug_info.h" #include "drgn.h" +#include "handler.h" #include "hash_table.h" #include "language.h" #include "memory_reader.h" @@ -31,6 +32,8 @@ #include "type.h" #include "vector.h" +struct drgn_symbol_finder; + /** * @defgroup Internals Internals * @@ -110,7 +113,7 @@ struct drgn_program { */ struct drgn_object_index oindex; struct drgn_debug_info dbinfo; - struct drgn_symbol_finder *symbol_finders; + struct drgn_handler_list symbol_finders; /* * Program information. @@ -381,9 +384,12 @@ drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, struct drgn_symbol **ret); struct drgn_error * -drgn_program_add_symbol_finder_impl(struct drgn_program *prog, - struct drgn_symbol_finder *finder, - drgn_symbol_find_fn fn, void *arg); +drgn_program_register_symbol_finder_impl(struct drgn_program *prog, + struct drgn_symbol_finder *finder, + const char *name, + const struct drgn_symbol_finder_ops *ops, + void *arg, size_t enable_index); + /** * Call before a blocking (I/O or long-running) operation. * diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index bf8225587..cfffc30d7 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -493,9 +493,10 @@ static struct drgn_error *py_object_find_fn(const char *name, size_t name_len, return drgn_object_copy(ret, &((DrgnObject *)obj)->obj); } -static struct drgn_error *py_symbol_find_fn(const char *name, uint64_t addr, - enum drgn_find_symbol_flags flags, - void *data, struct drgn_symbol_result_builder *builder) +static struct drgn_error * +py_symbol_find_fn(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder) { PyGILState_guard(); @@ -521,8 +522,10 @@ static struct drgn_error *py_symbol_find_fn(const char *name, uint64_t addr, _cleanup_pydecref_ PyObject *one_obj = PyBool_FromLong(flags & DRGN_FIND_SYMBOL_ONE); - _cleanup_pydecref_ PyObject *tmp = PyObject_CallFunction(data, "OOO", name_obj, - address_obj, one_obj); + _cleanup_pydecref_ PyObject *tmp = + PyObject_CallFunction(PyTuple_GET_ITEM(arg, 1), "OOOO", + PyTuple_GET_ITEM(arg, 0), name_obj, + address_obj, one_obj); if (!tmp) return drgn_error_from_python(); @@ -557,6 +560,153 @@ static struct drgn_error *py_symbol_find_fn(const char *name, uint64_t addr, return NULL; } +#define symbol_finder_arg(self, fn) \ + _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); \ + if (!arg) \ + return NULL; + +#define DEFINE_PROGRAM_FINDER_METHODS(which) \ +static PyObject *Program_register_##which##_finder(Program *self, \ + PyObject *args, \ + PyObject *kwds) \ +{ \ + struct drgn_error *err; \ + static char *keywords[] = {"name", "fn", "enable_index", NULL}; \ + const char *name; \ + PyObject *fn; \ + PyObject *enable_index_obj = Py_None; \ + if (!PyArg_ParseTupleAndKeywords(args, kwds, \ + "sO|$O:register_" #which "_finder", \ + keywords, &name, &fn, \ + &enable_index_obj)) \ + return NULL; \ + \ + if (!PyCallable_Check(fn)) { \ + PyErr_SetString(PyExc_TypeError, "fn must be callable"); \ + return NULL; \ + } \ + \ + size_t enable_index; \ + if (enable_index_obj == Py_None) { \ + enable_index = DRGN_HANDLER_REGISTER_DONT_ENABLE; \ + } else { \ + _cleanup_pydecref_ PyObject *negative_one = PyLong_FromLong(-1);\ + if (!negative_one) \ + return NULL; \ + int eq = PyObject_RichCompareBool(enable_index_obj, \ + negative_one, Py_EQ); \ + if (eq < 0) \ + return NULL; \ + if (eq) { \ + enable_index = DRGN_HANDLER_REGISTER_ENABLE_LAST; \ + } else { \ + enable_index = PyLong_AsSize_t(enable_index_obj); \ + if (enable_index == (size_t)-1 && PyErr_Occurred()) \ + return NULL; \ + /* \ + * If the index happens to be the \ + * DRGN_HANDLER_REGISTER_DONT_ENABLE sentinel \ + * (SIZE_MAX - 1), set it to something else; it's \ + * impossible to have this many finders anyways. \ + */ \ + if (enable_index == DRGN_HANDLER_REGISTER_DONT_ENABLE) \ + enable_index--; \ + } \ + } \ + \ + which##_finder_arg(self, fn) \ + if (!Program_hold_reserve(self, 1)) \ + return NULL; \ + const struct drgn_##which##_finder_ops ops = { \ + .find = py_##which##_find_fn, \ + }; \ + err = drgn_program_register_##which##_finder(&self->prog, name, &ops, \ + arg, enable_index); \ + if (err) \ + return set_drgn_error(err); \ + Program_hold_object(self, arg); \ + Py_RETURN_NONE; \ + \ +} \ + \ +static PyObject *Program_registered_##which##_finders(Program *self) \ +{ \ + struct drgn_error *err; \ + _cleanup_free_ const char **names = NULL; \ + size_t count; \ + err = drgn_program_registered_##which##_finders(&self->prog, &names, \ + &count); \ + if (err) \ + return set_drgn_error(err); \ + _cleanup_pydecref_ PyObject *res = PySet_New(NULL); \ + if (!res) \ + return NULL; \ + for (size_t i = 0; i < count; i++) { \ + _cleanup_pydecref_ PyObject *name = \ + PyUnicode_FromString(names[i]); \ + if (!name) \ + return NULL; \ + if (PySet_Add(res, name)) \ + return NULL; \ + } \ + return_ptr(res); \ +} \ + \ +static PyObject *Program_set_enabled_##which##_finders(Program *self, \ + PyObject *args, \ + PyObject *kwds) \ +{ \ + struct drgn_error *err; \ + static char *keywords[] = {"names", NULL}; \ + PyObject *names_obj; \ + if (!PyArg_ParseTupleAndKeywords(args, kwds, \ + "O:set_enabled_" #which "_finders", \ + keywords, &names_obj)) \ + return NULL; \ + _cleanup_pydecref_ PyObject *names_seq = \ + PySequence_Fast(names_obj, "names must be sequence"); \ + if (!names_seq) \ + return NULL; \ + size_t count = PySequence_Fast_GET_SIZE(names_seq); \ + _cleanup_free_ const char **names = \ + malloc_array(count, sizeof(names[0])); \ + if (!names) \ + return NULL; \ + for (size_t i = 0; i < count; i++) { \ + names[i] = PyUnicode_AsUTF8(PySequence_Fast_GET_ITEM(names_seq, i));\ + if (!names[i]) \ + return NULL; \ + } \ + err = drgn_program_set_enabled_##which##_finders(&self->prog, names, \ + count); \ + if (err) \ + return set_drgn_error(err); \ + Py_RETURN_NONE; \ +} \ + \ +static PyObject *Program_enabled_##which##_finders(Program *self) \ +{ \ + struct drgn_error *err; \ + _cleanup_free_ const char **names = NULL; \ + size_t count; \ + err = drgn_program_enabled_##which##_finders(&self->prog, &names, \ + &count); \ + if (err) \ + return set_drgn_error(err); \ + _cleanup_pydecref_ PyObject *res = PyList_New(count); \ + if (!res) \ + return NULL; \ + for (size_t i = 0; i < count; i++) { \ + PyObject *name = PyUnicode_FromString(names[i]); \ + if (!name) \ + return NULL; \ + PyList_SET_ITEM(res, i, name); \ + } \ + return_ptr(res); \ +} + +DEFINE_PROGRAM_FINDER_METHODS(symbol) + static PyObject *Program_add_object_finder(Program *self, PyObject *args, PyObject *kwds) { @@ -583,34 +733,6 @@ static PyObject *Program_add_object_finder(Program *self, PyObject *args, Py_RETURN_NONE; } -static PyObject *Program_add_symbol_finder(Program *self, PyObject *args, - PyObject *kwds) -{ - static char *keywords[] = {"fn", NULL}; - struct drgn_error *err; - PyObject *fn; - int ret; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:add_symbol_finder", - keywords, &fn)) - return NULL; - - if (!PyCallable_Check(fn)) { - PyErr_SetString(PyExc_TypeError, "fn must be callable"); - return NULL; - } - - ret = Program_hold_object(self, fn); - if (ret == -1) - return NULL; - - err = drgn_program_add_symbol_finder(&self->prog, py_symbol_find_fn, - fn); - if (err) - return set_drgn_error(err); - Py_RETURN_NONE; -} - static PyObject *Program_set_core_dump(Program *self, PyObject *args, PyObject *kwds) { @@ -1255,15 +1377,30 @@ static int Program_set_language(Program *self, PyObject *value, void *arg) return 0; } +#define PROGRAM_FINDER_METHOD_DEFS(which) \ + {"register_" #which "_finder", \ + (PyCFunction)Program_register_##which##_finder, \ + METH_VARARGS | METH_KEYWORDS, \ + drgn_Program_register_##which##_finder_DOC}, \ + {"registered_" #which "_finders", \ + (PyCFunction)Program_registered_##which##_finders, METH_NOARGS, \ + drgn_Program_registered_##which##_finders_DOC}, \ + {"set_enabled_" #which "_finders", \ + (PyCFunction)Program_set_enabled_##which##_finders, \ + METH_VARARGS | METH_KEYWORDS, \ + drgn_Program_set_enabled_##which##_finders_DOC}, \ + {"enabled_" #which "_finders", \ + (PyCFunction)Program_enabled_##which##_finders, METH_NOARGS, \ + drgn_Program_enabled_##which##_finders_DOC} + static PyMethodDef Program_methods[] = { {"add_memory_segment", (PyCFunction)Program_add_memory_segment, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_memory_segment_DOC}, + PROGRAM_FINDER_METHOD_DEFS(symbol), {"add_type_finder", (PyCFunction)Program_add_type_finder, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_type_finder_DOC}, {"add_object_finder", (PyCFunction)Program_add_object_finder, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_object_finder_DOC}, - {"add_symbol_finder", (PyCFunction)Program_add_symbol_finder, - METH_VARARGS | METH_KEYWORDS, drgn_Program_add_symbol_finder_DOC}, {"set_core_dump", (PyCFunction)Program_set_core_dump, METH_VARARGS | METH_KEYWORDS, drgn_Program_set_core_dump_DOC}, {"set_kernel", (PyCFunction)Program_set_kernel, METH_NOARGS, diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index b2e880af4..425219813 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -8,6 +8,7 @@ #include "cleanup.h" #include "drgn.h" +#include "handler.h" #include "vector.h" struct drgn_symbol { @@ -20,10 +21,9 @@ struct drgn_symbol { }; struct drgn_symbol_finder { - drgn_symbol_find_fn fn; + struct drgn_handler handler; + struct drgn_symbol_finder_ops ops; void *arg; - struct drgn_symbol_finder *next; - bool free; }; DEFINE_VECTOR_TYPE(symbolp_vector, struct drgn_symbol *); diff --git a/tests/test_program.py b/tests/test_program.py index f71b1c3a5..024213bf8 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -922,3 +922,93 @@ def test_unsaved(self): with self.assertRaisesRegex(FaultError, "memory not saved in core dump") as cm: prog.read(0xFFFF0000, len(data) + 4) self.assertEqual(cm.exception.address, 0xFFFF000C) + + +def dummy_symbol_finder(prog, name, address, one): + return () + + +class TestSymbolFinders(TestCase): + def test_registered(self): + prog = Program() + self.assertEqual(prog.registered_symbol_finders(), {"elf"}) + prog.register_symbol_finder("foo", dummy_symbol_finder) + self.assertEqual(prog.registered_symbol_finders(), {"elf", "foo"}) + + def test_register_duplicate(self): + self.assertRaisesRegex( + ValueError, + "duplicate symbol finder", + Program().register_symbol_finder, + "elf", + dummy_symbol_finder, + ) + + def test_default_enabled(self): + self.assertEqual(Program().enabled_symbol_finders(), ["elf"]) + + def test_disable_all(self): + prog = Program() + prog.set_enabled_symbol_finders(()) + self.assertEqual(prog.enabled_symbol_finders(), []) + + def test_register_then_enable(self): + prog = Program() + prog.register_symbol_finder("foo", dummy_symbol_finder) + self.assertEqual(prog.enabled_symbol_finders(), ["elf"]) + + prog.set_enabled_symbol_finders(["foo", "elf"]) + self.assertEqual(prog.enabled_symbol_finders(), ["foo", "elf"]) + + def test_register_enable_index(self): + prog = Program() + with self.subTest("None"): + prog.register_symbol_finder("ghost", dummy_symbol_finder, enable_index=None) + self.assertEqual(prog.enabled_symbol_finders(), ["elf"]) + + with self.subTest("first"): + prog.register_symbol_finder("foo", dummy_symbol_finder, enable_index=0) + self.assertEqual(prog.enabled_symbol_finders(), ["foo", "elf"]) + + with self.subTest("middle"): + prog.register_symbol_finder("bar", dummy_symbol_finder, enable_index=1) + self.assertEqual(prog.enabled_symbol_finders(), ["foo", "bar", "elf"]) + + with self.subTest("end"): + prog.register_symbol_finder("baz", dummy_symbol_finder, enable_index=3) + self.assertEqual( + prog.enabled_symbol_finders(), ["foo", "bar", "elf", "baz"] + ) + + with self.subTest("past end"): + prog.register_symbol_finder("qux", dummy_symbol_finder, enable_index=10) + self.assertEqual( + prog.enabled_symbol_finders(), ["foo", "bar", "elf", "baz", "qux"] + ) + + with self.subTest("DRGN_HANDLER_REGISTER_DONT_ENABLE"): + prog.register_symbol_finder( + "quux", + dummy_symbol_finder, + enable_index=(2**64 if sys.maxsize > 2**32 else 2**32) - 2, + ) + self.assertEqual( + prog.enabled_symbol_finders(), + ["foo", "bar", "elf", "baz", "qux", "quux"], + ) + + with self.subTest("last"): + prog.register_symbol_finder("quuux", dummy_symbol_finder, enable_index=-1) + self.assertEqual( + prog.enabled_symbol_finders(), + ["foo", "bar", "elf", "baz", "qux", "quux", "quuux"], + ) + + def test_register_enable_index_invalid(self): + self.assertRaises( + OverflowError, + Program().register_symbol_finder, + "foo", + dummy_symbol_finder, + enable_index=-2, + ) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 72c35af1d..7e3b81e5e 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -272,7 +272,7 @@ class TestSymbolFinder(TestCase): Symbol("three", 0xFFFF2008, 8, SymbolBinding.GLOBAL, SymbolKind.FUNC), ] - def finder(self, arg_name, arg_address, arg_one): + def finder(self, prog, arg_name, arg_address, arg_one): self.called = True res = [] self.assertEqual(self.expected_name, arg_name) @@ -293,7 +293,7 @@ def finder(self, arg_name, arg_address, arg_one): def setUp(self): self.prog = Program() - self.prog.add_symbol_finder(self.finder) + self.prog.register_symbol_finder("test", self.finder, enable_index=0) self.called = False def expect_args(self, name, address, one): From e1def322e4ef4e10bbc56917e13f2213abdfba85 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 4 Jun 2024 13:13:35 -0700 Subject: [PATCH 3/6] python: add TypeKindSet C type finders are passed a bitset of type kinds, but Python type finders currently get called for each type kind. An upcoming update to the type finder interface will fix this, but we need a set of TypeKinds, and we'd rather not construct a real Python set for it. Instead, add a TypeKindSet bitset that satisfies the collections.abc.Set interface. Signed-off-by: Omar Sandoval --- _drgn.pyi | 21 ++ docs/api_reference.rst | 1 + drgn/__init__.py | 2 + libdrgn/Makefile.am | 1 + libdrgn/bitops.h | 12 ++ libdrgn/python/drgnpy.h | 15 ++ libdrgn/python/main.c | 3 + libdrgn/python/type_kind_set.c | 346 +++++++++++++++++++++++++++++++++ tests/test_type_kind_set.py | 308 +++++++++++++++++++++++++++++ 9 files changed, 709 insertions(+) create mode 100644 libdrgn/python/type_kind_set.c create mode 100644 tests/test_type_kind_set.py diff --git a/_drgn.pyi b/_drgn.pyi index d4184e0f0..9cda06752 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -7,6 +7,7 @@ libdrgn bindings Don't use this module directly. Instead, use the drgn package. """ +import collections.abc import enum import os import sys @@ -2329,6 +2330,26 @@ class TypeKind(enum.Enum): FUNCTION = ... """Function type.""" +class TypeKindSet(collections.abc.Set[TypeKind]): + """ + Immutable set of :class:`TypeKind`\\ s. + + >>> kinds = TypeKindSet({TypeKind.STRUCT, TypeKind.CLASS}) + >>> TypeKind.STRUCT in kinds + True + >>> TypeKind.INT in kinds + False + >>> for kind in kinds: + ... print(kind) + ... + TypeKind.STRUCT + TypeKind.CLASS + """ + + def __contains__(self, __x: object) -> bool: ... + def __iter__(self) -> Iterator[TypeKind]: ... + def __len__(self) -> int: ... + class PrimitiveType(enum.Enum): """A ``PrimitiveType`` represents a primitive type known to drgn.""" diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 16c0e65a4..63169e798 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -130,6 +130,7 @@ Types .. drgndoc:: TypeParameter .. drgndoc:: TypeTemplateParameter .. drgndoc:: TypeKind +.. drgndoc:: TypeKindSet .. drgndoc:: PrimitiveType .. drgndoc:: Qualifiers .. drgndoc:: offsetof diff --git a/drgn/__init__.py b/drgn/__init__.py index 1df95b5fd..903884b75 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -75,6 +75,7 @@ Type, TypeEnumerator, TypeKind, + TypeKindSet, TypeMember, TypeParameter, TypeTemplateParameter, @@ -129,6 +130,7 @@ "Type", "TypeEnumerator", "TypeKind", + "TypeKindSet", "TypeMember", "TypeParameter", "TypeTemplateParameter", diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index afb261c70..689b2fec0 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -172,6 +172,7 @@ _drgn_la_SOURCES = python/constants.c \ python/test.c \ python/thread.c \ python/type.c \ + python/type_kind_set.c \ python/util.c _drgn_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden diff --git a/libdrgn/bitops.h b/libdrgn/bitops.h index d3baf0625..7d147b018 100644 --- a/libdrgn/bitops.h +++ b/libdrgn/bitops.h @@ -87,6 +87,18 @@ #define ilog2_impl(arg, suffix, x) \ ((8 * sizeof(0u##suffix) - 1) ^ __builtin_clz##suffix(x)) +/** + * Bit population count. + * + * Return the number of 1-bits in @p x. + * + * ``` + * popcount(8) == 1 + * popcount(3) == 2 + * ``` + */ +#define popcount(x) generic_bitop(x, PP_UNIQUE(_x), builtin_bitop_impl, popcount) + #define builtin_bitop_impl(arg, suffix, x) __builtin_##arg##suffix(x) #define generic_bitop(x, unique_x, impl, impl_arg) ({ \ __auto_type unique_x = (x); \ diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index c745eab6f..f9652d533 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -196,6 +196,16 @@ typedef struct { union drgn_lazy_object *lazy_obj; } LazyObject; +typedef struct { + PyObject_HEAD + uint64_t kinds; +} TypeKindSet; + +typedef struct { + PyObject_HEAD + uint64_t mask; +} TypeKindSetIterator; + typedef struct { LazyObject lazy_obj; PyObject *name; @@ -236,6 +246,8 @@ extern PyTypeObject Symbol_type; extern PyTypeObject Thread_type; extern PyTypeObject ThreadIterator_type; extern PyTypeObject TypeEnumerator_type; +extern PyTypeObject TypeKindSet_type; +extern PyTypeObject TypeKindSetIterator_type; extern PyTypeObject TypeMember_type; extern PyTypeObject TypeParameter_type; extern PyTypeObject TypeTemplateParameter_type; @@ -259,6 +271,9 @@ PyObject *Language_wrap(const struct drgn_language *language); int language_converter(PyObject *o, void *p); int add_languages(void); +PyObject *TypeKindSet_wrap(uint64_t mask); +int init_type_kind_set(void); + static inline DrgnObject *DrgnObject_alloc(Program *prog) { DrgnObject *ret = call_tp_alloc(DrgnObject); diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index 2bc92517d..c57340071 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -277,6 +277,9 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) add_type(m, &Thread_type) || add_type(m, &ThreadIterator_type) || add_type(m, &TypeEnumerator_type) || + add_type(m, &TypeKindSet_type) || + PyType_Ready(&TypeKindSetIterator_type) || + init_type_kind_set() || add_type(m, &TypeMember_type) || add_type(m, &TypeParameter_type) || add_type(m, &TypeTemplateParameter_type) || diff --git a/libdrgn/python/type_kind_set.c b/libdrgn/python/type_kind_set.c new file mode 100644 index 000000000..460df4a4a --- /dev/null +++ b/libdrgn/python/type_kind_set.c @@ -0,0 +1,346 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../bitops.h" + +static PyObject *collections_abc_Set; + +PyObject *TypeKindSet_wrap(uint64_t mask) +{ + TypeKindSet *res = call_tp_alloc(TypeKindSet); + if (res) + res->kinds = mask; + return (PyObject *)res; +} + +int init_type_kind_set(void) +{ + _cleanup_pydecref_ PyObject *collections_abc = + PyImport_ImportModule("collections.abc"); + if (!collections_abc) + return -1; + collections_abc_Set = PyObject_GetAttrString(collections_abc, "Set"); + if (!collections_abc_Set) + return -1; + _cleanup_pydecref_ PyObject *res = + PyObject_CallMethod(collections_abc_Set, "register", "O", + &TypeKindSet_type); + if (!res) + return -1; + Py_DECREF(res); + return 0; +} + +static inline const char *type_kind_to_str(enum drgn_type_kind kind) +{ + SWITCH_ENUM(kind, + case DRGN_TYPE_VOID: + return "TypeKind.VOID"; + case DRGN_TYPE_INT: + return "TypeKind.INT"; + case DRGN_TYPE_BOOL: + return "TypeKind.BOOL"; + case DRGN_TYPE_FLOAT: + return "TypeKind.FLOAT"; + case DRGN_TYPE_STRUCT: + return "TypeKind.STRUCT"; + case DRGN_TYPE_UNION: + return "TypeKind.UNION"; + case DRGN_TYPE_CLASS: + return "TypeKind.CLASS"; + case DRGN_TYPE_ENUM: + return "TypeKind.ENUM"; + case DRGN_TYPE_TYPEDEF: + return "TypeKind.TYPEDEF"; + case DRGN_TYPE_POINTER: + return "TypeKind.POINTER"; + case DRGN_TYPE_ARRAY: + return "TypeKind.ARRAY"; + case DRGN_TYPE_FUNCTION: + return "TypeKind.FUNCTION"; + ) +} + +static PyObject *TypeKindSet_repr(TypeKindSet *self) +{ + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + if (append_string(parts, "TypeKindSet(")) + return NULL; + bool first = true; + unsigned int kind; + uint64_t kinds = self->kinds; + for_each_bit(kind, kinds) { + if (append_format(parts, "%s%s", first ? "{" : ", ", + type_kind_to_str(kind))) + return NULL; + first = false; + } + if (append_string(parts, first ? ")" : "})")) + return NULL; + return join_strings(parts); +} + +static int TypeKind_value(PyObject *obj) +{ + _cleanup_pydecref_ PyObject *value_obj = + PyObject_GetAttrString(obj, "value"); + if (!value_obj) + return -1; + long value = PyLong_AsLong(value_obj); + if ((value < 0 && !PyErr_Occurred()) || value >= 64) { + PyErr_BadArgument(); + return -1; + } + return value; +} + +static int TypeKindSet_mask_from_iterable(PyObject *iterable, uint64_t *ret) +{ + if (PyObject_TypeCheck(iterable, &TypeKindSet_type)) { + *ret = ((TypeKindSet *)iterable)->kinds; + return 0; + } + + int non_typekind = 0; + uint64_t mask = 0; + if (iterable) { + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(iterable); + if (!it) + return -1; + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + if (PyObject_TypeCheck(item, + (PyTypeObject *)TypeKind_class)) { + int value = TypeKind_value(item); + if (value < 0) + return -1; + mask |= 1 << value; + } else { + non_typekind = 1; + } + } + if (PyErr_Occurred()) + return -1; + } + *ret = mask; + return non_typekind; +} + +static TypeKindSet *TypeKindSet_new(PyTypeObject *subtype, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = { "", NULL }; + PyObject *iterable = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TypeKindSet", keywords, + &iterable)) + return NULL; + uint64_t kinds = 0; + if (iterable) { + int r = TypeKindSet_mask_from_iterable(iterable, &kinds); + if (r < 0) + return NULL; + if (r > 0) { + PyErr_SetString(PyExc_TypeError, + "TypeKindSet elements must be TypeKind"); + return NULL; + } + } + TypeKindSet *res = (TypeKindSet *)subtype->tp_alloc(subtype, 0); + res->kinds = kinds; + return res; +} + +static Py_ssize_t TypeKindSet_length(TypeKindSet *self) +{ + return popcount(self->kinds); +} + +static int TypeKindSet_contains(TypeKindSet *self, PyObject *other) +{ + if (!PyObject_TypeCheck(other, (PyTypeObject *)TypeKind_class)) + return 0; + int value = TypeKind_value(other); + if (value < 0) + return value; + return !!(self->kinds & (1 << value)); +} + +static Py_ssize_t TypeKindSet_hash(TypeKindSet *self) +{ + return self->kinds; +} + +static PyObject *TypeKindSet_richcompare(TypeKindSet *self, PyObject *other, + int op) +{ + if (!PyObject_IsInstance(other, collections_abc_Set)) + Py_RETURN_NOTIMPLEMENTED; + + uint64_t other_kinds; + int other_non_typekind = + TypeKindSet_mask_from_iterable(other, &other_kinds); + if (other_non_typekind < 0) + return NULL; + + switch (op) { + case Py_EQ: + Py_RETURN_BOOL(self->kinds == other_kinds && !other_non_typekind); + case Py_NE: + Py_RETURN_BOOL(self->kinds != other_kinds || other_non_typekind); + case Py_LT: + Py_RETURN_BOOL((self->kinds != other_kinds || other_non_typekind) + && (other_kinds | self->kinds) == other_kinds); + case Py_GT: + Py_RETURN_BOOL(self->kinds != other_kinds + && (self->kinds | other_kinds) == self->kinds + && !other_non_typekind); + case Py_LE: + Py_RETURN_BOOL((other_kinds | self->kinds) == other_kinds); + case Py_GE: + Py_RETURN_BOOL((self->kinds | other_kinds) == self->kinds + && !other_non_typekind); + default: + Py_UNREACHABLE(); + } +} + +static TypeKindSetIterator *TypeKindSet_iter(TypeKindSet *self) +{ + TypeKindSetIterator *it = call_tp_alloc(TypeKindSetIterator); + if (!it) + return NULL; + it->mask = self->kinds; + return it; +} + +static PyObject *TypeKindSet_isdisjoint(TypeKindSet *self, PyObject *other) +{ + uint64_t other_kinds; + if (TypeKindSet_mask_from_iterable(other, &other_kinds) < 0) + return NULL; + // Non-TypeKind elements in other cannot be in self, so they don't + // affect the answer and can be ignored. + Py_RETURN_BOOL((self->kinds ^ other_kinds) + == (self->kinds | other_kinds)); +} + +static PyObject *TypeKindSet_sub(PyObject *left, PyObject *right) +{ + uint64_t left_kinds; + int left_r = TypeKindSet_mask_from_iterable(left, &left_kinds); + if (left_r < 0) + return NULL; + if (left_r > 0) + Py_RETURN_NOTIMPLEMENTED; + + uint64_t right_kinds; + if (TypeKindSet_mask_from_iterable(right, &right_kinds) < 0) + return NULL; + // If right has non-TypeKind elements, then left is a TypeKindSet and + // removing non-TypeKind elements has no effect, so they can be ignored. + return TypeKindSet_wrap(left_kinds & ~right_kinds); +} + +static PyObject *TypeKindSet_and(PyObject *left, PyObject *right) +{ + uint64_t left_kinds, right_kinds; + if (TypeKindSet_mask_from_iterable(left, &left_kinds) < 0 + || TypeKindSet_mask_from_iterable(right, &right_kinds) < 0) + return NULL; + // At least one of the operands is a TypeKindSet, so non-TypeKind + // elements cannot be in both and can be ignored. + return TypeKindSet_wrap(left_kinds & right_kinds); +} + +#define TypeKindSet_OR_OP(name, op) \ +static PyObject *TypeKindSet_##name(PyObject *left, PyObject *right) \ +{ \ + /* Both operands must only contain TypeKind elements. */ \ + uint64_t left_kinds; \ + int left_r = TypeKindSet_mask_from_iterable(left, &left_kinds); \ + if (left_r < 0) \ + return NULL; \ + if (left_r > 0) \ + Py_RETURN_NOTIMPLEMENTED; \ + \ + uint64_t right_kinds; \ + int right_r = TypeKindSet_mask_from_iterable(right, &right_kinds); \ + if (right_r < 0) \ + return NULL; \ + if (right_r > 0) \ + Py_RETURN_NOTIMPLEMENTED; \ + \ + return TypeKindSet_wrap(left_kinds op right_kinds); \ +} +TypeKindSet_OR_OP(xor, ^) +TypeKindSet_OR_OP(or, |) +#undef TypeKindSet_OR_OP + +static PyNumberMethods TypeKindSet_as_number = { + .nb_subtract = TypeKindSet_sub, + .nb_and = TypeKindSet_and, + .nb_xor = TypeKindSet_xor, + .nb_or = TypeKindSet_or, +}; + +static PySequenceMethods TypeKindSet_as_sequence = { + .sq_length = (lenfunc)TypeKindSet_length, + .sq_contains = (objobjproc)TypeKindSet_contains, +}; + +static PyMethodDef TypeKindSet_methods[] = { + {"isdisjoint", (PyCFunction)TypeKindSet_isdisjoint, METH_O}, + {}, +}; + +PyTypeObject TypeKindSet_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.TypeKindSet", + .tp_basicsize = sizeof(TypeKindSet), + .tp_repr = (reprfunc)TypeKindSet_repr, + .tp_as_number = &TypeKindSet_as_number, + .tp_as_sequence = &TypeKindSet_as_sequence, + .tp_hash = (hashfunc)TypeKindSet_hash, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_TypeKindSet_DOC, + .tp_richcompare = (richcmpfunc)TypeKindSet_richcompare, + .tp_iter = (getiterfunc)TypeKindSet_iter, + .tp_methods = TypeKindSet_methods, + .tp_new = (newfunc)TypeKindSet_new, +}; + +static PyObject *TypeKindSetIterator_next(TypeKindSetIterator *self) +{ + if (self->mask == 0) + return NULL; + unsigned int i = ctz(self->mask); + self->mask &= self->mask - 1; + return PyObject_CallFunction(TypeKind_class, "I", i); +} + +static PyObject *TypeKindSetIterator_length_hint(TypeKindSetIterator *self) +{ + return PyLong_FromUnsignedLong(popcount(self->mask)); +} + +static PyMethodDef TypeKindSetIterator_methods[] = { + {"__length_hint__", (PyCFunction)TypeKindSetIterator_length_hint, + METH_NOARGS}, + {}, +}; + +PyTypeObject TypeKindSetIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._TypeKindSetIterator", + .tp_basicsize = sizeof(TypeKindSetIterator), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)TypeKindSetIterator_next, + .tp_methods = TypeKindSetIterator_methods, +}; diff --git a/tests/test_type_kind_set.py b/tests/test_type_kind_set.py new file mode 100644 index 000000000..2c28ba834 --- /dev/null +++ b/tests/test_type_kind_set.py @@ -0,0 +1,308 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import TypeKind, TypeKindSet +from tests import TestCase + + +class TestTypeKindSet(TestCase): + def test_repr(self): + self.assertEqual( + repr(TypeKindSet()), + "TypeKindSet()", + ) + self.assertEqual( + repr(TypeKindSet({TypeKind.STRUCT})), + "TypeKindSet({TypeKind.STRUCT})", + ) + self.assertEqual( + repr(TypeKindSet({TypeKind.STRUCT, TypeKind.CLASS})), + "TypeKindSet({TypeKind.STRUCT, TypeKind.CLASS})", + ) + + def test_len(self): + self.assertEqual(len(TypeKindSet()), 0) + self.assertEqual(len(TypeKindSet({TypeKind.STRUCT})), 1) + self.assertEqual(len(TypeKindSet({TypeKind.INT, TypeKind.STRUCT})), 2) + + def test_in(self): + self.assertNotIn(TypeKind.INT, TypeKindSet()) + self.assertIn(TypeKind.INT, TypeKindSet({TypeKind.INT})) + self.assertIn(TypeKind.INT, TypeKindSet({TypeKind.INT, TypeKind.STRUCT})) + self.assertIn(TypeKind.STRUCT, TypeKindSet({TypeKind.INT, TypeKind.STRUCT})) + self.assertNotIn(TypeKind.UNION, TypeKindSet({TypeKind.INT, TypeKind.STRUCT})) + self.assertNotIn(0, TypeKindSet({TypeKind.INT})) + self.assertNotIn(TypeKind.INT.value, TypeKindSet({TypeKind.INT})) + + def test_iter(self): + for s in ( + set(), + {TypeKind.VOID}, + {TypeKind.ARRAY, TypeKind.POINTER}, + ): + self.assertEqual(set(TypeKindSet(s)), s) + + def test_eq(self): + self.assertTrue(TypeKindSet() == TypeKindSet()) + self.assertFalse(TypeKindSet() == TypeKindSet({TypeKind.BOOL})) + self.assertTrue(TypeKindSet({TypeKind.FLOAT}) == TypeKindSet({TypeKind.FLOAT})) + self.assertFalse( + TypeKindSet({TypeKind.FUNCTION}) == TypeKindSet({TypeKind.BOOL}) + ) + self.assertTrue( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + == TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + ) + self.assertFalse( + TypeKindSet({TypeKind.BOOL, TypeKind.FUNCTION}) + == TypeKindSet({TypeKind.BOOL, TypeKind.ARRAY}), + ) + self.assertTrue( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + == {TypeKind.FLOAT, TypeKind.TYPEDEF} + ) + self.assertFalse(TypeKindSet() == {"asdf"}) + self.assertFalse( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + == {TypeKind.FLOAT, TypeKind.TYPEDEF, "foo"} + ) + self.assertFalse( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + == [TypeKind.FLOAT, TypeKind.TYPEDEF] + ) + + def test_ne(self): + self.assertFalse(TypeKindSet() != TypeKindSet()) + self.assertTrue(TypeKindSet() != TypeKindSet({TypeKind.BOOL})) + self.assertFalse(TypeKindSet({TypeKind.FLOAT}) != TypeKindSet({TypeKind.FLOAT})) + self.assertTrue( + TypeKindSet({TypeKind.FUNCTION}) != TypeKindSet({TypeKind.BOOL}) + ) + self.assertFalse( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + != TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + ) + self.assertTrue( + TypeKindSet({TypeKind.BOOL, TypeKind.FUNCTION}) + != TypeKindSet({TypeKind.BOOL, TypeKind.ARRAY}), + ) + self.assertFalse( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + != {TypeKind.FLOAT, TypeKind.TYPEDEF} + ) + self.assertTrue(TypeKindSet() != {"asdf"}) + self.assertTrue( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + != {TypeKind.FLOAT, TypeKind.TYPEDEF, "foo"} + ) + self.assertTrue( + TypeKindSet({TypeKind.FLOAT, TypeKind.TYPEDEF}) + != [TypeKind.FLOAT, TypeKind.TYPEDEF] + ) + + def test_lt(self): + self.assertFalse(TypeKindSet() < TypeKindSet()) + self.assertTrue(TypeKindSet() < TypeKindSet({TypeKind.BOOL})) + self.assertFalse(TypeKindSet({TypeKind.BOOL}) < TypeKindSet()) + self.assertFalse(TypeKindSet({TypeKind.BOOL}) < TypeKindSet({TypeKind.BOOL})) + self.assertTrue( + TypeKindSet({TypeKind.BOOL}) < TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) + ) + self.assertFalse( + TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) < TypeKindSet({TypeKind.BOOL}) + ) + self.assertFalse(TypeKindSet({TypeKind.INT}) < TypeKindSet({TypeKind.BOOL})) + self.assertTrue( + TypeKindSet({TypeKind.BOOL}) < {TypeKind.BOOL, TypeKind.FLOAT, "foo"} + ) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.BOOL}) < [TypeKind.BOOL, TypeKind.FLOAT] + + def test_gt(self): + self.assertFalse(TypeKindSet() > TypeKindSet()) + self.assertFalse(TypeKindSet() > TypeKindSet({TypeKind.BOOL})) + self.assertTrue(TypeKindSet({TypeKind.BOOL}) > TypeKindSet()) + self.assertFalse(TypeKindSet({TypeKind.BOOL}) > TypeKindSet({TypeKind.BOOL})) + self.assertFalse( + TypeKindSet({TypeKind.BOOL}) > TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) + ) + self.assertTrue( + TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) > TypeKindSet({TypeKind.BOOL}) + ) + self.assertFalse(TypeKindSet({TypeKind.INT}) > TypeKindSet({TypeKind.BOOL})) + self.assertFalse( + TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) > {TypeKind.BOOL, "foo"} + ) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.BOOL}) > [] + + def test_le(self): + self.assertTrue(TypeKindSet() <= TypeKindSet()) + self.assertTrue(TypeKindSet() <= TypeKindSet({TypeKind.BOOL})) + self.assertFalse(TypeKindSet({TypeKind.BOOL}) <= TypeKindSet()) + self.assertTrue(TypeKindSet({TypeKind.BOOL}) <= TypeKindSet({TypeKind.BOOL})) + self.assertTrue( + TypeKindSet({TypeKind.BOOL}) <= TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) + ) + self.assertFalse( + TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) <= TypeKindSet({TypeKind.BOOL}) + ) + self.assertFalse(TypeKindSet({TypeKind.INT}) <= TypeKindSet({TypeKind.BOOL})) + self.assertTrue(TypeKindSet({TypeKind.BOOL}) <= {TypeKind.BOOL, "foo"}) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.BOOL}) <= [TypeKind.BOOL, TypeKind.FLOAT] + + def test_ge(self): + self.assertTrue(TypeKindSet() >= TypeKindSet()) + self.assertFalse(TypeKindSet() >= TypeKindSet({TypeKind.BOOL})) + self.assertTrue(TypeKindSet({TypeKind.BOOL}) >= TypeKindSet()) + self.assertTrue(TypeKindSet({TypeKind.BOOL}) >= TypeKindSet({TypeKind.BOOL})) + self.assertFalse( + TypeKindSet({TypeKind.BOOL}) >= TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) + ) + self.assertTrue( + TypeKindSet({TypeKind.BOOL, TypeKind.FLOAT}) >= TypeKindSet({TypeKind.BOOL}) + ) + self.assertFalse(TypeKindSet({TypeKind.INT}) >= TypeKindSet({TypeKind.BOOL})) + self.assertFalse(TypeKindSet({TypeKind.BOOL}) >= {TypeKind.BOOL, "foo"}) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.BOOL}) >= [TypeKind.BOOL] + + def test_isdisjoint(self): + self.assertTrue(TypeKindSet().isdisjoint(TypeKindSet())) + self.assertTrue( + TypeKindSet({TypeKind.POINTER}).isdisjoint(TypeKindSet({TypeKind.ARRAY})) + ) + self.assertFalse( + TypeKindSet({TypeKind.POINTER}).isdisjoint(TypeKindSet({TypeKind.POINTER})) + ) + self.assertFalse( + TypeKindSet({TypeKind.ARRAY}).isdisjoint( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + ) + ) + self.assertFalse( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}).isdisjoint( + TypeKindSet({TypeKind.ARRAY}) + ) + ) + self.assertTrue(TypeKindSet({TypeKind.POINTER}).isdisjoint({TypeKind.ARRAY})) + self.assertTrue(TypeKindSet({TypeKind.POINTER}).isdisjoint([TypeKind.ARRAY])) + self.assertFalse(TypeKindSet({TypeKind.POINTER}).isdisjoint({TypeKind.POINTER})) + self.assertFalse(TypeKindSet({TypeKind.POINTER}).isdisjoint([TypeKind.POINTER])) + + def test_sub(self): + self.assertEqual(TypeKindSet() - TypeKindSet(), TypeKindSet()) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) - TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) - TypeKindSet({TypeKind.POINTER}), + TypeKindSet(), + ) + self.assertEqual( + TypeKindSet({TypeKind.ARRAY}) + - TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + TypeKindSet(), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + - TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) - {TypeKind.ARRAY}, + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) - {TypeKind.ARRAY, "asdf"}, + TypeKindSet({TypeKind.POINTER}), + ) + + def test_and(self): + self.assertEqual(TypeKindSet() & TypeKindSet(), TypeKindSet()) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) & TypeKindSet({TypeKind.ARRAY}), + TypeKindSet(), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) & TypeKindSet({TypeKind.POINTER}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.ARRAY}) + & TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + TypeKindSet({TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + & TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) & {TypeKind.ARRAY}, + TypeKindSet({TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + & {TypeKind.POINTER, "asdf"}, + TypeKindSet({TypeKind.POINTER}), + ) + + def test_xor(self): + self.assertEqual(TypeKindSet() ^ TypeKindSet(), TypeKindSet()) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) ^ TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) ^ TypeKindSet({TypeKind.POINTER}), + TypeKindSet(), + ) + self.assertEqual( + TypeKindSet({TypeKind.ARRAY}) + ^ TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + ^ TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) ^ {TypeKind.ARRAY}, + TypeKindSet({TypeKind.POINTER}), + ) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) ^ {TypeKind.ARRAY, "asdf"} + + def test_or(self): + self.assertEqual(TypeKindSet() | TypeKindSet(), TypeKindSet()) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) | TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) | TypeKindSet({TypeKind.POINTER}), + TypeKindSet({TypeKind.POINTER}), + ) + self.assertEqual( + TypeKindSet({TypeKind.ARRAY}) + | TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}) + | TypeKindSet({TypeKind.ARRAY}), + TypeKindSet({TypeKind.POINTER, TypeKind.ARRAY}), + ) + self.assertEqual( + TypeKindSet({TypeKind.POINTER}) | {TypeKind.ARRAY}, + {TypeKind.POINTER, TypeKind.ARRAY}, + ) + with self.assertRaises(TypeError): + TypeKindSet({TypeKind.POINTER}) | {TypeKind.ARRAY, "asdf"} + + def test_all_kinds(self): + self.assertEqual(set(TypeKindSet(TypeKind)), set(TypeKind)) From 8bdd63c5058c67418221e15a9b3c48366bbd94ac Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 31 May 2024 11:44:32 -0700 Subject: [PATCH 4/6] Allow naming and configuring order of type finders Like for symbol finders, we want extra flexibility around configuring type finders. The type finder callback signature also has a couple of warts: it doesn't take the program since it was added before types needed to be constructed from a program, and it is called separately for each type kind since originally type lookups were for only one kind. While we're adding a new interface, let's fix these warts: pass the program and a set of type kinds. However, we need to keep the old add_type_finder() interface for backwards compatibility. Signed-off-by: Omar Sandoval --- _drgn.pyi | 63 ++++++++++++++-- docs/advanced_usage.rst | 2 +- libdrgn/debug_info.c | 8 +- libdrgn/drgn.h | 102 +++++++++++++++++++------- libdrgn/program.c | 1 + libdrgn/program.h | 10 ++- libdrgn/python/program.c | 155 +++++++++++++++++++++++++++------------ libdrgn/type.c | 48 +++--------- libdrgn/type.h | 15 +--- tests/__init__.py | 6 +- tests/test_program.py | 107 +++++++++++++++++++++++---- 11 files changed, 363 insertions(+), 154 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 9cda06752..21020c08a 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -437,6 +437,49 @@ class Program: another :ref:`buffer ` type. """ ... + def register_type_finder( + self, + name: str, + fn: Callable[[Program, TypeKindSet, str, Optional[str]], Optional[Type]], + *, + enable_index: Optional[int] = None, + ) -> None: + """ + Register a callback for finding types in the program. + + This does not enable the finder unless *enable_index* is given. + + :param name: Finder name. + :param fn: Callable taking the program, a :class:`TypeKindSet`, name, + and filename: ``(prog, kinds, name, filename)``. The filename + should be matched with :func:`filename_matches()`. This should + return a :class:`Type` or ``None`` if not found. + :param enable_index: Insert the finder into the list of enabled type + finders at the given index. If -1 or greater than the number of + enabled finders, insert it at the end. If ``None`` or not given, + don't enable the finder. + :raises ValueError: if there is already a finder with the given name + """ + ... + def registered_type_finders(self) -> Set[str]: + """Return the names of all registered type finders.""" + ... + def set_enabled_type_finders(self, names: Sequence[str]) -> None: + """ + Set the list of enabled type finders. + + Finders are called in the same order as the list until a type is found. + + Finders that are not in the list are not called. + + :param names: Names of finders to enable, in order. + :raises ValueError: if no finder has a given name or the same name is + given more than once + """ + ... + def enabled_type_finders(self) -> List[str]: + """Return the names of enabled type finders, in order.""" + ... def register_symbol_finder( self, name: str, @@ -499,16 +542,20 @@ class Program: self, fn: Callable[[TypeKind, str, Optional[str]], Optional[Type]] ) -> None: """ - Register a callback for finding types in the program. + Deprecated method to register and enable a callback for finding types + in the program. - Callbacks are called in reverse order of the order they were added - until the type is found. So, more recently added callbacks take - precedence. + .. deprecated:: 0.0.27 + Use :meth:`register_type_finder()` instead. + + The differences from :meth:`register_type_finder()` are: - :param fn: Callable taking a :class:`TypeKind`, name, and filename: - ``(kind, name, filename)``. The filename should be matched with - :func:`filename_matches()`. This should return a :class:`Type` - or ``None`` if not found. + 1. *fn* is not passed *prog*. + 2. *fn* is passed a :class:`TypeKind` instead of a + :class:`TypeKindSet`. If multiple kinds are being searched for, *fn* + will be called multiple times. + 3. A name for the finder is generated from *fn*. + 4. The finder is always enabled before any existing finders. """ ... def add_object_finder( diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 839fe8ce5..fc5b5c877 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -82,7 +82,7 @@ program "memory": print(drgn.Object(prog, 'struct btrfs_super_block', address=65536)) run_interactive(prog, banner_func=lambda _: "BTRFS debugger") -:meth:`drgn.Program.add_type_finder()` and +:meth:`drgn.Program.register_type_finder()` and :meth:`drgn.Program.add_object_finder()` are the equivalent methods for plugging in types and objects. diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index ede84af56..34a36dd2e 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2181,8 +2181,12 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, // unlikely to fail anwyays, so don't bother propagating an error up. if (!dbinfo->dwfl) abort(); - drgn_program_add_type_finder_impl(prog, &dbinfo->type_finder, - drgn_debug_info_find_type, dbinfo); + const struct drgn_type_finder_ops type_finder_ops = { + .find = drgn_debug_info_find_type, + }; + drgn_program_register_type_finder_impl(prog, &dbinfo->type_finder, + "dwarf", &type_finder_ops, + dbinfo, 0); drgn_program_add_object_finder_impl(prog, &dbinfo->object_finder, drgn_debug_info_find_object, dbinfo); diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index d5b47d882..97c5b421d 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -587,41 +587,91 @@ enum { DRGN_HANDLER_REGISTER_DONT_ENABLE = SIZE_MAX - 1, }; +/** Type finder callback table. */ +struct drgn_type_finder_ops { + /** + * Callback to destroy the type finder. + * + * This may be @c NULL. + * + * @param[in] arg Argument passed to @ref + * drgn_program_register_type_finder(). + */ + void (*destroy)(void *arg); + /** + * Callback for finding a type. + * + * @param[in] kinds Kinds of types to find, as a bitmask of bits shifted + * by @ref drgn_type_kind. E.g., `(1 << DRGN_TYPE_STRUCT) | (1 << + * DRGN_TYPE_CLASS)` means to find a structure or class type. + * @param[in] name Name of type (or tag, for structs, unions, and + * enums). This is @em not null-terminated. + * @param[in] name_len Length of @p name. + * @param[in] filename Filename containing the type definition or @c + * NULL. This should be matched with @ref drgn_filename_matches(). + * @param[in] arg Argument passed to @ref + * drgn_program_register_type_finder(). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. In particular, if + * the type is not found, this should return &@ref drgn_not_found; any + * other errors are considered fatal. + */ + struct drgn_error *(*find)(uint64_t kinds, const char *name, + size_t name_len, const char *filename, + void *arg, struct drgn_qualified_type *ret); +}; + /** - * Callback for finding a type. + * Register a type finding callback. * - * @param[in] kinds Kinds of types to find, as a bitmask of bits shifted by @ref - * drgn_type_kind. E.g., `(1 << DRGN_TYPE_STRUCT) | (1 << DRGN_TYPE_CLASS)` - * means to find a structure or class type. - * @param[in] name Name of type (or tag, for structs, unions, and enums). This - * is @em not null-terminated. - * @param[in] name_len Length of @p name. - * @param[in] filename Filename containing the type definition or @c NULL. This - * should be matched with @ref drgn_filename_matches(). - * @param[in] arg Argument passed to @ref drgn_program_add_type_finder(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. In particular, if the type - * is not found, this should return &@ref drgn_not_found; any other errors are - * considered fatal. + * @param[in] name Finder name. This is copied. + * @param[in] ops Callback table. This is copied. + * @param[in] arg Argument to pass to callbacks. + * @param[in] enable_index Insert the finder into the list of enabled finders at + * the given index. If @ref DRGN_HANDLER_REGISTER_ENABLE_LAST or greater than + * the number of enabled finders, insert it at the end. If @ref + * DRGN_HANDLER_REGISTER_DONT_ENABLE, don’t enable the finder. */ -typedef struct drgn_error * -(*drgn_type_find_fn)(uint64_t kinds, const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret); +struct drgn_error * +drgn_program_register_type_finder(struct drgn_program *prog, const char *name, + const struct drgn_type_finder_ops *ops, + void *arg, size_t enable_index); /** - * Register a type finding callback. + * Get the names of all registered type finders. * - * Callbacks are called in reverse order of the order they were added until the - * type is found. So, more recently added callbacks take precedence. + * The order of the names is arbitrary. * - * @param[in] fn The callback. - * @param[in] arg Argument to pass to @p fn. - * @return @c NULL on success, non-@c NULL on error. + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_registered_type_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** + * Set the list of enabled type finders. + * + * Finders are called in the same order as the list until a type is found. + * + * @param[in] names Names of finders to enable, in order. + * @param[in] count Number of names in @p names. */ struct drgn_error * -drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, - void *arg); +drgn_program_set_enabled_type_finders(struct drgn_program *prog, + const char * const *names, + size_t count); + +/** + * Get the names of enabled type finders, in order. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error *drgn_program_enabled_type_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); /** Flags for @ref drgn_program_find_object(). */ enum drgn_find_object_flags { diff --git a/libdrgn/program.c b/libdrgn/program.c index 76b4a251a..9b7f91cb0 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -283,6 +283,7 @@ drgn_program_enabled_##which##_finders(struct drgn_program *prog, \ count_ret); \ } +DRGN_PROGRAM_FINDER(type) DRGN_PROGRAM_FINDER(symbol) #undef DRGN_PROGRAM_FINDER diff --git a/libdrgn/program.h b/libdrgn/program.h index ac674b5fd..e8687b683 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -32,6 +32,7 @@ #include "type.h" #include "vector.h" +struct drgn_type_finder; struct drgn_symbol_finder; /** @@ -82,7 +83,7 @@ struct drgn_program { * Types. */ /** Callbacks for finding types. */ - struct drgn_type_finder *type_finders; + struct drgn_handler_list type_finders; /** Void type for each language. */ struct drgn_type void_types[DRGN_NUM_LANGUAGES]; /** Cache of primitive types. */ @@ -383,6 +384,13 @@ drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, uint64_t address, struct drgn_symbol **ret); +struct drgn_error * +drgn_program_register_type_finder_impl(struct drgn_program *prog, + struct drgn_type_finder *finder, + const char *name, + const struct drgn_type_finder_ops *ops, + void *arg, size_t enable_index); + struct drgn_error * drgn_program_register_symbol_finder_impl(struct drgn_program *prog, struct drgn_symbol_finder *finder, diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index cfffc30d7..000bbfbdb 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -385,6 +385,29 @@ static PyObject *Program_add_memory_segment(Program *self, PyObject *args, Py_RETURN_NONE; } +static inline struct drgn_error * +py_type_find_fn_common(PyObject *type_obj, void *arg, + struct drgn_qualified_type *ret) +{ + if (!PyObject_TypeCheck(type_obj, &DrgnType_type)) { + PyErr_SetString(PyExc_TypeError, + "type find callback must return Type or None"); + return drgn_error_from_python(); + } + // This check is also done in libdrgn, but we need it here because if + // the type isn't from this program, then there's no guarantee that it + // will remain valid after we decrement its reference count. + if (DrgnType_prog((DrgnType *)type_obj) + != (Program *)PyTuple_GET_ITEM(arg, 0)) { + PyErr_SetString(PyExc_ValueError, + "type find callback returned type from wrong program"); + return drgn_error_from_python(); + } + ret->type = ((DrgnType *)type_obj)->type; + ret->qualifiers = ((DrgnType *)type_obj)->qualifiers; + return NULL; +} + static struct drgn_error *py_type_find_fn(uint64_t kinds, const char *name, size_t name_len, const char *filename, void *arg, @@ -392,6 +415,33 @@ static struct drgn_error *py_type_find_fn(uint64_t kinds, const char *name, { PyGILState_guard(); + _cleanup_pydecref_ PyObject *name_obj = + PyUnicode_FromStringAndSize(name, name_len); + if (!name_obj) + return drgn_error_from_python(); + + _cleanup_pydecref_ PyObject *kinds_obj = TypeKindSet_wrap(kinds); + if (!kinds_obj) + return drgn_error_from_python(); + _cleanup_pydecref_ PyObject *type_obj = + PyObject_CallFunction(PyTuple_GET_ITEM(arg, 1), "OOOs", + PyTuple_GET_ITEM(arg, 0), kinds_obj, + name_obj, filename); + if (!type_obj) + return drgn_error_from_python(); + if (type_obj == Py_None) + return &drgn_not_found; + return py_type_find_fn_common(type_obj, arg, ret); +} + +// Old version for add_type_finder(). +static struct drgn_error *py_type_find_fn_old(uint64_t kinds, const char *name, + size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret) +{ + PyGILState_guard(); + _cleanup_pydecref_ PyObject *name_obj = PyUnicode_FromStringAndSize(name, name_len); if (!name_obj) @@ -411,56 +461,11 @@ static struct drgn_error *py_type_find_fn(uint64_t kinds, const char *name, return drgn_error_from_python(); if (type_obj == Py_None) continue; - if (!PyObject_TypeCheck(type_obj, &DrgnType_type)) { - PyErr_SetString(PyExc_TypeError, - "type find callback must return Type or None"); - return drgn_error_from_python(); - } - // This check is also done in libdrgn, but we need it here - // because if the type isn't from this program, then there's no - // guarantee that it will remain valid after we decrement its - // reference count. - if (DrgnType_prog((DrgnType *)type_obj) - != (Program *)PyTuple_GET_ITEM(arg, 0)) { - PyErr_SetString(PyExc_ValueError, - "type find callback returned type from wrong program"); - return drgn_error_from_python(); - } - ret->type = ((DrgnType *)type_obj)->type; - ret->qualifiers = ((DrgnType *)type_obj)->qualifiers; - return NULL; + return py_type_find_fn_common(type_obj, arg, ret); } return &drgn_not_found; } -static PyObject *Program_add_type_finder(Program *self, PyObject *args, - PyObject *kwds) -{ - struct drgn_error *err; - - static char *keywords[] = {"fn", NULL}; - PyObject *fn; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:add_type_finder", - keywords, &fn)) - return NULL; - - if (!PyCallable_Check(fn)) { - PyErr_SetString(PyExc_TypeError, "fn must be callable"); - return NULL; - } - - _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); - if (!arg) - return NULL; - if (Program_hold_object(self, arg)) - return NULL; - - err = drgn_program_add_type_finder(&self->prog, py_type_find_fn, arg); - if (err) - return set_drgn_error(err); - Py_RETURN_NONE; -} - static struct drgn_error *py_object_find_fn(const char *name, size_t name_len, const char *filename, enum drgn_find_object_flags flags, @@ -560,10 +565,11 @@ py_symbol_find_fn(const char *name, uint64_t addr, return NULL; } -#define symbol_finder_arg(self, fn) \ +#define type_finder_arg(self, fn) \ _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); \ if (!arg) \ return NULL; +#define symbol_finder_arg type_finder_arg #define DEFINE_PROGRAM_FINDER_METHODS(which) \ static PyObject *Program_register_##which##_finder(Program *self, \ @@ -705,8 +711,62 @@ static PyObject *Program_enabled_##which##_finders(Program *self) \ return_ptr(res); \ } +DEFINE_PROGRAM_FINDER_METHODS(type) DEFINE_PROGRAM_FINDER_METHODS(symbol) +static PyObject *deprecated_finder_name_obj(PyObject *fn) +{ + _cleanup_pydecref_ PyObject *name_attr_obj = + PyObject_GetAttrString(fn, "__name__"); + if (name_attr_obj) { + return PyUnicode_FromFormat("%S_%lu", name_attr_obj, random()); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + return PyUnicode_FromFormat("%lu", random()); + } else { + return NULL; + } +} + +static PyObject *Program_add_type_finder(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"fn", NULL}; + PyObject *fn; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:add_type_finder", + keywords, &fn)) + return NULL; + + if (!PyCallable_Check(fn)) { + PyErr_SetString(PyExc_TypeError, "fn must be callable"); + return NULL; + } + + _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); + if (!arg) + return NULL; + + _cleanup_pydecref_ PyObject *name_obj = deprecated_finder_name_obj(fn); + if (!name_obj) + return NULL; + const char *name = PyUnicode_AsUTF8(name_obj); + if (!name) + return NULL; + + if (!Program_hold_reserve(self, 1)) + return NULL; + const struct drgn_type_finder_ops ops = { + .find = py_type_find_fn_old, + }; + err = drgn_program_register_type_finder(&self->prog, name, &ops, arg, + 0); + if (err) + return set_drgn_error(err); + Program_hold_object(self, arg); + Py_RETURN_NONE; +} + static PyObject *Program_add_object_finder(Program *self, PyObject *args, PyObject *kwds) { @@ -1396,6 +1456,7 @@ static int Program_set_language(Program *self, PyObject *value, void *arg) static PyMethodDef Program_methods[] = { {"add_memory_segment", (PyCFunction)Program_add_memory_segment, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_memory_segment_DOC}, + PROGRAM_FINDER_METHOD_DEFS(type), PROGRAM_FINDER_METHOD_DEFS(symbol), {"add_type_finder", (PyCFunction)Program_add_type_finder, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_type_finder_DOC}, diff --git a/libdrgn/type.c b/libdrgn/type.c index d6e890970..371247525 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1310,40 +1310,11 @@ void drgn_program_deinit_types(struct drgn_program *prog) free(*it.entry); drgn_dedupe_type_set_deinit(&prog->dedupe_types); - struct drgn_type_finder *finder = prog->type_finders; - while (finder) { - struct drgn_type_finder *next = finder->next; - if (finder->free) - free(finder); - finder = next; - } -} - -struct drgn_error * -drgn_program_add_type_finder_impl(struct drgn_program *prog, - struct drgn_type_finder *finder, - drgn_type_find_fn fn, void *arg) -{ - if (finder) { - finder->free = false; - } else { - finder = malloc(sizeof(*finder)); - if (!finder) - return &drgn_enomem; - finder->free = true; - } - finder->fn = fn; - finder->arg = arg; - finder->next = prog->type_finders; - prog->type_finders = finder; - return NULL; -} - -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, - void *arg) -{ - return drgn_program_add_type_finder_impl(prog, NULL, fn, arg); + drgn_handler_list_deinit(struct drgn_type_finder, finder, + &prog->type_finders, + if (finder->ops.destroy) + finder->ops.destroy(finder->arg); + ); } struct drgn_error *drgn_program_find_type_impl(struct drgn_program *prog, @@ -1352,11 +1323,11 @@ struct drgn_error *drgn_program_find_type_impl(struct drgn_program *prog, const char *filename, struct drgn_qualified_type *ret) { - struct drgn_type_finder *finder = prog->type_finders; - while (finder) { + drgn_handler_list_for_each_enabled(struct drgn_type_finder, finder, + &prog->type_finders) { struct drgn_error *err = - finder->fn(kinds, name, name_len, filename, finder->arg, - ret); + finder->ops.find(kinds, name, name_len, filename, + finder->arg, ret); if (!err) { if (drgn_type_program(ret->type) != prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, @@ -1370,7 +1341,6 @@ struct drgn_error *drgn_program_find_type_impl(struct drgn_program *prog, } if (err != &drgn_not_found) return err; - finder = finder->next; } return &drgn_not_found; } diff --git a/libdrgn/type.h b/libdrgn/type.h index c8f46f9a4..02b262239 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -15,6 +15,7 @@ #include #include "drgn.h" +#include "handler.h" #include "hash_table.h" #include "vector.h" @@ -50,21 +51,11 @@ drgn_byte_order_from_little_endian(bool little_endian) /** Registered type finding callback in a @ref drgn_program. */ struct drgn_type_finder { - /** The callback. */ - drgn_type_find_fn fn; - /** Argument to pass to @ref drgn_type_finder::fn. */ + struct drgn_handler handler; + struct drgn_type_finder_ops ops; void *arg; - /** Next callback to try. */ - struct drgn_type_finder *next; - /** Whether this structure needs to be freed. */ - bool free; }; -struct drgn_error * -drgn_program_add_type_finder_impl(struct drgn_program *prog, - struct drgn_type_finder *finder, - drgn_type_find_fn fn, void *arg); - DEFINE_HASH_SET_TYPE(drgn_dedupe_type_set, struct drgn_type *); /** (type, member name) pair. */ diff --git a/tests/__init__.py b/tests/__init__.py index 2f44ba1db..c29a51085 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -68,11 +68,11 @@ class MockObject(NamedTuple): def mock_program(platform=MOCK_PLATFORM, *, segments=None, types=None, objects=None): - def mock_find_type(kind, name, filename): + def mock_find_type(prog, kinds, name, filename): if filename: return None for type in types: - if type.kind == kind: + if type.kind in kinds: try: type_name = type.name except AttributeError: @@ -105,7 +105,7 @@ def mock_object_find(prog, name, flags, filename): if segments is not None: add_mock_memory_segments(prog, segments) if types is not None: - prog.add_type_finder(mock_find_type) + prog.register_type_finder("mock", mock_find_type, enable_index=0) if objects is not None: prog.add_object_finder(mock_object_find) return prog diff --git a/tests/test_program.py b/tests/test_program.py index 024213bf8..7f5306129 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -404,14 +404,73 @@ def test_invalid_read_fn(self): ) -class TestTypes(MockProgramTestCase): - def test_invalid_finder(self): - self.assertRaises(TypeError, self.prog.add_type_finder, "foo") +class TestTypeFinder(TestCase): + def test_register(self): + prog = Program(MOCK_PLATFORM) + + # We don't test every corner case because the symbol finder tests cover + # the shared part. + self.assertEqual(prog.registered_type_finders(), {"dwarf"}) + self.assertEqual(prog.enabled_type_finders(), ["dwarf"]) + + prog.register_type_finder( + "foo", lambda prog, kinds, name, filename: None, enable_index=-1 + ) + self.assertEqual(prog.registered_type_finders(), {"dwarf", "foo"}) + self.assertEqual(prog.enabled_type_finders(), ["dwarf", "foo"]) + + prog.set_enabled_type_finders(["foo"]) + self.assertEqual(prog.registered_type_finders(), {"dwarf", "foo"}) + self.assertEqual(prog.enabled_type_finders(), ["foo"]) + + def test_add_type_finder(self): + prog = Program(MOCK_PLATFORM) + + def dummy(kind, name, filename): + if kind == TypeKind.TYPEDEF and name == "foo": + return prog.typedef_type("foo", prog.void_type()) + else: + return None + + prog.add_type_finder(dummy) + self.assertTrue(any("dummy" in name for name in prog.registered_type_finders())) + self.assertIn("dummy", prog.enabled_type_finders()[0]) + self.assertIdentical( + prog.type("foo"), prog.typedef_type("foo", prog.void_type()) + ) + + def test_register_invalid(self): + prog = Program(MOCK_PLATFORM) + self.assertRaises(TypeError, prog.register_type_finder, "foo", "foo") + prog.register_type_finder( + "foo", lambda prog, kinds, name, filename: "foo", enable_index=0 + ) + self.assertRaises(TypeError, prog.type, "int") + + def test_add_invalid(self): + prog = Program(MOCK_PLATFORM) + self.assertRaises(TypeError, prog.add_type_finder, "foo") + prog.add_type_finder(lambda kind, name, filename: "foo") + self.assertRaises(TypeError, prog.type, "int") + + def test_register_wrong_program(self): + def finder(prog, kinds, name, filename): + if TypeKind.TYPEDEF in kinds and name == "foo": + prog = Program() + return prog.typedef_type("foo", prog.void_type()) + else: + return None - self.prog.add_type_finder(lambda kind, name, filename: "foo") - self.assertRaises(TypeError, self.prog.type, "int") + prog = Program(MOCK_PLATFORM) + prog.register_type_finder("foo", finder, enable_index=0) + self.assertRaisesRegex( + ValueError, + "type find callback returned type from wrong program", + prog.type, + "foo", + ) - def test_finder_different_program(self): + def test_add_wrong_program(self): def finder(kind, name, filename): if kind == TypeKind.TYPEDEF and name == "foo": prog = Program() @@ -419,23 +478,41 @@ def finder(kind, name, filename): else: return None - self.prog.add_type_finder(finder) + prog = Program(MOCK_PLATFORM) + prog.add_type_finder(finder) self.assertRaisesRegex( ValueError, "type find callback returned type from wrong program", - self.prog.type, + prog.type, "foo", ) - def test_wrong_kind(self): - self.prog.add_type_finder(lambda kind, name, filename: self.prog.void_type()) - self.assertRaises(TypeError, self.prog.type, "int") + def test_register_wrong_kind(self): + prog = Program(MOCK_PLATFORM) + prog.register_type_finder( + "foo", lambda prog, kinds, name, filename: prog.void_type(), enable_index=0 + ) + self.assertRaises(TypeError, prog.type, "int") - def test_not_found(self): - self.assertRaises(LookupError, self.prog.type, "struct foo") - self.prog.add_type_finder(lambda kind, name, filename: None) - self.assertRaises(LookupError, self.prog.type, "struct foo") + def test_add_wrong_kind(self): + prog = Program(MOCK_PLATFORM) + prog.add_type_finder(lambda kind, name, filename: prog.void_type()) + self.assertRaises(TypeError, prog.type, "int") + def test_register_not_found(self): + prog = Program(MOCK_PLATFORM) + prog.register_type_finder( + "foo", lambda prog, kinds, name, filename: None, enable_index=0 + ) + self.assertRaises(LookupError, prog.type, "struct foo") + + def test_add_not_found(self): + prog = Program(MOCK_PLATFORM) + prog.add_type_finder(lambda kind, name, filename: None) + self.assertRaises(LookupError, prog.type, "struct foo") + + +class TestTypes(MockProgramTestCase): def test_already_type(self): self.assertIdentical( self.prog.type(self.prog.pointer_type(self.prog.void_type())), From 66c2576c0f18013bf73437455b17e0747aa2420b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 Jun 2024 09:51:50 -0700 Subject: [PATCH 5/6] libdrgn: linux_kernel: deduplicate kernel-specific program setup The ELF and libkdumpfile paths have duplicated logic for adding the Linux kernel object finder and setting the default language to C. Factor them out into a common helper. Signed-off-by: Omar Sandoval --- libdrgn/kdump.c | 5 +---- libdrgn/linux_kernel.c | 12 ++++++++++++ libdrgn/linux_kernel.h | 7 ++----- libdrgn/linux_kernel_object_find.inc.strswitch | 9 +++++---- libdrgn/program.c | 6 +----- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index f028171bd..4471e3cef 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -261,12 +261,9 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) } prog->flags |= DRGN_PROGRAM_IS_LINUX_KERNEL; - err = drgn_program_add_object_finder(prog, linux_kernel_object_find, - prog); + err = drgn_program_finish_set_kernel(prog); if (err) goto err_platform; - if (!prog->lang) - prog->lang = &drgn_language_c; prog->kdump_ctx = ctx; return NULL; diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 19eddc9d0..493905b7b 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -381,6 +381,18 @@ static struct drgn_error *linux_kernel_get_vmemmap(struct drgn_program *prog, #include "linux_kernel_object_find.inc" // IWYU pragma: keep +struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) +{ + struct drgn_error *err; + err = drgn_program_add_object_finder(prog, linux_kernel_object_find, + prog); + if (err) + return err; + if (!prog->lang) + prog->lang = &drgn_language_c; + return NULL; +} + struct kernel_module_iterator { char *name; uint64_t start, end; diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index d848a45ec..ea949f55f 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -8,6 +8,8 @@ struct drgn_debug_info_load_state; +struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog); + struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, size_t count, uint64_t offset, void *arg, bool physical); @@ -21,11 +23,6 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); -struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, - const char *filename, - enum drgn_find_object_flags flags, - void *arg, struct drgn_object *ret); - struct drgn_error * linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load); diff --git a/libdrgn/linux_kernel_object_find.inc.strswitch b/libdrgn/linux_kernel_object_find.inc.strswitch index 78f48342f..89166bbee 100644 --- a/libdrgn/linux_kernel_object_find.inc.strswitch +++ b/libdrgn/linux_kernel_object_find.inc.strswitch @@ -1,10 +1,11 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later -struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, - const char *filename, - enum drgn_find_object_flags flags, - void *arg, struct drgn_object *ret) +static struct drgn_error * +linux_kernel_object_find(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret) { struct drgn_program *prog = arg; if (!filename) { diff --git a/libdrgn/program.c b/libdrgn/program.c index 9b7f91cb0..11e411143 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -653,13 +653,9 @@ drgn_program_set_core_dump_fd_internal(struct drgn_program *prog, int fd, goto out_segments; } if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - err = drgn_program_add_object_finder(prog, - linux_kernel_object_find, - prog); + err = drgn_program_finish_set_kernel(prog); if (err) goto out_segments; - if (!prog->lang) - prog->lang = &drgn_language_c; } return NULL; From b859caf9933f2aefafed5f15055e8247c112b117 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 4 Jun 2024 16:58:25 -0700 Subject: [PATCH 6/6] Allow naming and configuring order of object finders This one doesn't need any changes to the callback signature, just the new interface. We also keep add_object_finder() for compatibility. Signed-off-by: Omar Sandoval --- _drgn.pyi | 59 ++++++++++++++++++++---- docs/advanced_usage.rst | 2 +- libdrgn/Makefile.am | 2 - libdrgn/debug_info.c | 9 ++-- libdrgn/debug_info.h | 6 +-- libdrgn/drgn.h | 98 ++++++++++++++++++++++++++++++---------- libdrgn/linux_kernel.c | 6 ++- libdrgn/object.h | 6 +++ libdrgn/object_index.c | 94 -------------------------------------- libdrgn/object_index.h | 85 ---------------------------------- libdrgn/program.c | 66 +++++++++++++++++++-------- libdrgn/program.h | 16 ++++--- libdrgn/python/program.c | 19 ++++++-- tests/__init__.py | 2 +- tests/test_program.py | 84 +++++++++++++++++++++++++++++----- 15 files changed, 290 insertions(+), 264 deletions(-) delete mode 100644 libdrgn/object_index.c delete mode 100644 libdrgn/object_index.h diff --git a/_drgn.pyi b/_drgn.pyi index 21020c08a..22e96b02b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -480,6 +480,49 @@ class Program: def enabled_type_finders(self) -> List[str]: """Return the names of enabled type finders, in order.""" ... + def register_object_finder( + self, + name: str, + fn: Callable[[Program, str, FindObjectFlags, Optional[str]], Optional[Object]], + *, + enable_index: Optional[int] = None, + ) -> None: + """ + Register a callback for finding objects in the program. + + This does not enable the finder unless *enable_index* is given. + + :param name: Finder name. + :param fn: Callable taking the program, name, :class:`FindObjectFlags`, + and filename: ``(prog, name, flags, filename)``. The filename + should be matched with :func:`filename_matches()`. This should + return an :class:`Object` or ``None`` if not found. + :param enable_index: Insert the finder into the list of enabled object + finders at the given index. If -1 or greater than the number of + enabled finders, insert it at the end. If ``None`` or not given, + don't enable the finder. + :raises ValueError: if there is already a finder with the given name + """ + ... + def registered_object_finders(self) -> Set[str]: + """Return the names of all registered object finders.""" + ... + def set_enabled_object_finders(self, names: Sequence[str]) -> None: + """ + Set the list of enabled object finders. + + Finders are called in the same order as the list until an object is found. + + Finders that are not in the list are not called. + + :param names: Names of finders to enable, in order. + :raises ValueError: if no finder has a given name or the same name is + given more than once + """ + ... + def enabled_object_finders(self) -> List[str]: + """Return the names of enabled object finders, in order.""" + ... def register_symbol_finder( self, name: str, @@ -563,16 +606,16 @@ class Program: fn: Callable[[Program, str, FindObjectFlags, Optional[str]], Optional[Object]], ) -> None: """ - Register a callback for finding objects in the program. + Deprecated method to register and enable a callback for finding objects + in the program. - Callbacks are called in reverse order of the order they were added - until the object is found. So, more recently added callbacks take - precedence. + .. deprecated:: 0.0.27 + Use :meth:`register_object_finder()` instead. - :param fn: Callable taking a program, name, :class:`FindObjectFlags`, - and filename: ``(prog, name, flags, filename)``. The filename - should be matched with :func:`filename_matches()`. This should - return an :class:`Object` or ``None`` if not found. + The differences from :meth:`register_object_finder()` are: + + 1. A name for the finder is generated from *fn*. + 2. The finder is always enabled before any existing finders. """ ... def set_core_dump(self, path: Union[Path, int]) -> None: diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index fc5b5c877..2bfad0f8c 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -83,7 +83,7 @@ program "memory": run_interactive(prog, banner_func=lambda _: "BTRFS debugger") :meth:`drgn.Program.register_type_finder()` and -:meth:`drgn.Program.add_object_finder()` are the equivalent methods for +:meth:`drgn.Program.register_object_finder()` are the equivalent methods for plugging in types and objects. Environment Variables diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 689b2fec0..5da34e3bf 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -88,8 +88,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ nstring.h \ object.c \ object.h \ - object_index.c \ - object_index.h \ openmp.c \ openmp.h \ orc.h \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 34a36dd2e..d7cdcfd70 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2187,9 +2187,12 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_register_type_finder_impl(prog, &dbinfo->type_finder, "dwarf", &type_finder_ops, dbinfo, 0); - drgn_program_add_object_finder_impl(prog, &dbinfo->object_finder, - drgn_debug_info_find_object, - dbinfo); + const struct drgn_object_finder_ops object_finder_ops = { + .find = drgn_debug_info_find_object, + }; + drgn_program_register_object_finder_impl(prog, &dbinfo->object_finder, + "dwarf", &object_finder_ops, + dbinfo, 0); const struct drgn_symbol_finder_ops symbol_finder_ops = { .find = elf_symbols_search, }; diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 0b689106b..b64a327bf 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -20,7 +20,7 @@ #include "drgn.h" #include "dwarf_info.h" #include "hash_table.h" -#include "object_index.h" +#include "object.h" #include "orc_info.h" #include "string_builder.h" #include "symbol.h" @@ -253,13 +253,13 @@ struct drgn_error * drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, const struct drgn_language **ret); -/** @ref drgn_type_find_fn() that uses debugging information. */ +/** @ref drgn_type_finder_ops::find() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(uint64_t kinds, const char *name, size_t name_len, const char *filename, void *arg, struct drgn_qualified_type *ret); -/** @ref drgn_object_find_fn() that uses debugging information. */ +/** @ref drgn_object_finder_ops::find() that uses debugging information. */ struct drgn_error * drgn_debug_info_find_object(const char *name, size_t name_len, const char *filename, diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 97c5b421d..6096469fc 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -685,38 +685,90 @@ enum drgn_find_object_flags { DRGN_FIND_OBJECT_ANY = (1 << 3) - 1, }; +/** Object finder callback table. */ +struct drgn_object_finder_ops { + /** + * Callback to destroy the object finder. + * + * This may be @c NULL. + * + * @param[in] arg Argument passed to @ref + * drgn_program_register_object_finder(). + */ + void (*destroy)(void *arg); + /** + * Callback for finding an object. + * + * @param[in] name Name of object. This is @em not null-terminated. + * @param[in] name_len Length of @p name. + * @param[in] filename Filename containing the object definition or @c + * NULL. This should be matched with @ref drgn_filename_matches(). + * @param[in] flags Flags indicating what kind of object to look for. + * @param[in] arg Argument passed to @ref + * drgn_program_register_object_finder(). + * @param[out] ret Returned object. This must only be modified on + * success. + * @return @c NULL on success, non-@c NULL on error. In particular, if + * the object is not found, this should return &@ref drgn_not_found; any + * other errors are considered fatal. + */ + struct drgn_error *(*find)(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, + void *arg, struct drgn_object *ret); +}; + /** - * Callback for finding an object. + * Register an object finding callback. * - * @param[in] name Name of object. This is @em not null-terminated. - * @param[in] name_len Length of @p name. - * @param[in] filename Filename containing the object definition or @c NULL. - * This should be matched with @ref drgn_filename_matches(). - * @param[in] flags Flags indicating what kind of object to look for. - * @param[in] arg Argument passed to @ref drgn_program_add_object_finder(). - * @param[out] ret Returned object. This must only be modified on success. - * @return @c NULL on success, non-@c NULL on error. In particular, if the - * object is not found, this should return &@ref drgn_not_found; any other - * errors are considered fatal. + * @param[in] name Finder name. This is copied. + * @param[in] ops Callback table. This is copied. + * @param[in] arg Argument to pass to callbacks. + * @param[in] enable_index Insert the finder into the list of enabled finders at + * the given index. If @ref DRGN_HANDLER_REGISTER_ENABLE_LAST or greater than + * the number of enabled finders, insert it at the end. If @ref + * DRGN_HANDLER_REGISTER_DONT_ENABLE, don’t enable the finder. */ -typedef struct drgn_error * -(*drgn_object_find_fn)(const char *name, size_t name_len, const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret); +struct drgn_error * +drgn_program_register_object_finder(struct drgn_program *prog, const char *name, + const struct drgn_object_finder_ops *ops, + void *arg, size_t enable_index); /** - * Register a object finding callback. + * Get the names of all registered object finders. * - * Callbacks are called in reverse order of the order they were added until the - * object is found. So, more recently added callbacks take precedence. + * The order of the names is arbitrary. * - * @param[in] fn The callback. - * @param[in] arg Argument to pass to @p fn. - * @return @c NULL on success, non-@c NULL on error. + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. */ struct drgn_error * -drgn_program_add_object_finder(struct drgn_program *prog, - drgn_object_find_fn fn, void *arg); +drgn_program_registered_object_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** + * Set the list of enabled object finders. + * + * Finders are called in the same order as the list until a object is found. + * + * @param[in] names Names of finders to enable, in order. + * @param[in] count Number of names in @p names. + */ +struct drgn_error * +drgn_program_set_enabled_object_finders(struct drgn_program *prog, + const char * const *names, + size_t count); + +/** + * Get the names of enabled object finders, in order. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_enabled_object_finders(struct drgn_program *prog, + const char ***names_ret, size_t *count_ret); /** * Set a @ref drgn_program to a core dump. diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 493905b7b..14a161e11 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -384,8 +384,10 @@ static struct drgn_error *linux_kernel_get_vmemmap(struct drgn_program *prog, struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) { struct drgn_error *err; - err = drgn_program_add_object_finder(prog, linux_kernel_object_find, - prog); + const struct drgn_object_finder_ops ops = { + .find = linux_kernel_object_find, + }; + err = drgn_program_register_object_finder(prog, "linux", &ops, prog, 0); if (err) return err; if (!prog->lang) diff --git a/libdrgn/object.h b/libdrgn/object.h index 8fbac1d85..2b8e82747 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -34,6 +34,12 @@ * @{ */ +struct drgn_object_finder { + struct drgn_handler handler; + struct drgn_object_finder_ops ops; + void *arg; +}; + /** Allocate a zero-initialized @ref drgn_value. */ static inline bool drgn_value_zalloc(uint64_t size, union drgn_value *value_ret, char **buf_ret) diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c deleted file mode 100644 index 5b49a0b84..000000000 --- a/libdrgn/object_index.c +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// SPDX-License-Identifier: LGPL-2.1-or-later - -#include -#include - -#include "object_index.h" - -void drgn_object_index_init(struct drgn_object_index *oindex) -{ - oindex->finders = NULL; -} - -void drgn_object_index_deinit(struct drgn_object_index *oindex) -{ - struct drgn_object_finder *finder = oindex->finders; - while (finder) { - struct drgn_object_finder *next = finder->next; - if (finder->free) - free(finder); - finder = next; - } -} - -struct drgn_error * -drgn_object_index_add_finder(struct drgn_object_index *oindex, - struct drgn_object_finder *finder, - drgn_object_find_fn fn, void *arg) -{ - if (finder) { - finder->free = false; - } else { - finder = malloc(sizeof(*finder)); - if (!finder) - return &drgn_enomem; - finder->free = true; - } - finder->fn = fn; - finder->arg = arg; - finder->next = oindex->finders; - oindex->finders = finder; - return NULL; -} - -struct drgn_error *drgn_object_index_find(struct drgn_object_index *oindex, - const char *name, - const char *filename, - enum drgn_find_object_flags flags, - struct drgn_object *ret) -{ - struct drgn_error *err; - size_t name_len; - struct drgn_object_finder *finder; - const char *kind_str; - - if ((flags & ~DRGN_FIND_OBJECT_ANY) || !flags) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "invalid find object flags"); - } - - name_len = strlen(name); - finder = oindex->finders; - while (finder) { - err = finder->fn(name, name_len, filename, flags, finder->arg, - ret); - if (err != &drgn_not_found) - return err; - finder = finder->next; - } - - switch (flags) { - case DRGN_FIND_OBJECT_CONSTANT: - kind_str = "constant "; - break; - case DRGN_FIND_OBJECT_FUNCTION: - kind_str = "function "; - break; - case DRGN_FIND_OBJECT_VARIABLE: - kind_str = "variable "; - break; - default: - kind_str = ""; - break; - } - if (filename) { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find %s'%s' in '%s'", - kind_str, name, filename); - } else { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find %s'%s'", kind_str, - name); - } -} diff --git a/libdrgn/object_index.h b/libdrgn/object_index.h deleted file mode 100644 index f3d843821..000000000 --- a/libdrgn/object_index.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// SPDX-License-Identifier: LGPL-2.1-or-later - -/** - * @file - * - * Object lookup. - * - * See @ref ObjectIndex. - */ - -#ifndef DRGN_OBJECT_INDEX_H -#define DRGN_OBJECT_INDEX_H - -#include "drgn.h" - -/** - * @ingroup Internals - * - * @defgroup ObjectIndex Object index - * - * Object lookup. - * - * @ref drgn_object_index provides a common interface for finding objects (e.g., - * variables, constants, and functions) in a program. - * - * @{ - */ - -/** Registered callback in a @ref drgn_object_index. */ -struct drgn_object_finder { - /** The callback. */ - drgn_object_find_fn fn; - /** Argument to pass to @ref drgn_object_finder::fn. */ - void *arg; - /** Next callback to try. */ - struct drgn_object_finder *next; - /** Whether this structure needs to be freed. */ - bool free; -}; - -/** - * Object index. - * - * A object index is used to find objects (variables, constants, and functions) - * by name. The objects are found using callbacks which are registered with @ref - * drgn_object_index_add_finder(). @ref drgn_object_index_find() searches for an - * object. - */ -struct drgn_object_index { - /** Callbacks for finding objects. */ - struct drgn_object_finder *finders; -}; - -/** Initialize a @ref drgn_object_index. */ -void drgn_object_index_init(struct drgn_object_index *oindex); - -/** Deinitialize a @ref drgn_object_index. */ -void drgn_object_index_deinit(struct drgn_object_index *oindex); - -struct drgn_error * -drgn_object_index_add_finder(struct drgn_object_index *oindex, - struct drgn_object_finder *finder, - drgn_object_find_fn fn, void *arg); - -/** - * Find an object in a @ref drgn_object_index. - * - * @param[in] oindex Object index. - * @param[in] name Name of the object. - * @param[in] filename Exact filename containing the object definition, or @c - * NULL for any definition. - * @param[in] flags Bitmask of @ref drgn_find_object_flags. - * @param[out] ret Returned object. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_object_index_find(struct drgn_object_index *oindex, - const char *name, - const char *filename, - enum drgn_find_object_flags flags, - struct drgn_object *ret); - -/** @} */ - -#endif /* DRGN_OBJECT_INDEX_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 11e411143..513be636f 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -28,7 +28,6 @@ #include "linux_kernel.h" #include "memory_reader.h" #include "minmax.h" -#include "object_index.h" #include "program.h" #include "symbol.h" #include "util.h" @@ -100,7 +99,6 @@ void drgn_program_init(struct drgn_program *prog, memset(prog, 0, sizeof(*prog)); drgn_memory_reader_init(&prog->reader); drgn_program_init_types(prog); - drgn_object_index_init(&prog->oindex); drgn_debug_info_init(&prog->dbinfo, prog); prog->core_fd = -1; if (platform) @@ -139,7 +137,11 @@ void drgn_program_deinit(struct drgn_program *prog) if (finder->ops.destroy) finder->ops.destroy(finder->arg); ); - drgn_object_index_deinit(&prog->oindex); + drgn_handler_list_deinit(struct drgn_object_finder, finder, + &prog->object_finders, + if (finder->ops.destroy) + finder->ops.destroy(finder->arg); + ); drgn_program_deinit_types(prog); drgn_memory_reader_deinit(&prog->reader); @@ -196,21 +198,6 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, physical); } -struct drgn_error * -drgn_program_add_object_finder_impl(struct drgn_program *prog, - struct drgn_object_finder *finder, - drgn_object_find_fn fn, void *arg) -{ - return drgn_object_index_add_finder(&prog->oindex, finder, fn, arg); -} - -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_add_object_finder(struct drgn_program *prog, - drgn_object_find_fn fn, void *arg) -{ - return drgn_program_add_object_finder_impl(prog, NULL, fn, arg); -} - #define DRGN_PROGRAM_FINDER(which) \ struct drgn_error * \ drgn_program_register_##which##_finder_impl(struct drgn_program *prog, \ @@ -284,6 +271,7 @@ drgn_program_enabled_##which##_finders(struct drgn_program *prog, \ } DRGN_PROGRAM_FINDER(type) +DRGN_PROGRAM_FINDER(object) DRGN_PROGRAM_FINDER(symbol) #undef DRGN_PROGRAM_FINDER @@ -1845,12 +1833,50 @@ drgn_program_find_object(struct drgn_program *prog, const char *name, enum drgn_find_object_flags flags, struct drgn_object *ret) { + struct drgn_error *err; + + if ((flags & ~DRGN_FIND_OBJECT_ANY) || !flags) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "invalid find object flags"); + } if (ret && drgn_object_program(ret) != prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "object is from wrong program"); } - return drgn_object_index_find(&prog->oindex, name, filename, flags, - ret); + + size_t name_len = strlen(name); + drgn_handler_list_for_each_enabled(struct drgn_object_finder, finder, + &prog->object_finders) { + err = finder->ops.find(name, name_len, filename, flags, + finder->arg, ret); + if (err != &drgn_not_found) + return err; + } + + const char *kind_str; + switch (flags) { + case DRGN_FIND_OBJECT_CONSTANT: + kind_str = "constant "; + break; + case DRGN_FIND_OBJECT_FUNCTION: + kind_str = "function "; + break; + case DRGN_FIND_OBJECT_VARIABLE: + kind_str = "variable "; + break; + default: + kind_str = ""; + break; + } + if (filename) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find %s'%s' in '%s'", + kind_str, name, filename); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find %s'%s'", kind_str, + name); + } } struct drgn_error *drgn_error_symbol_not_found(uint64_t address) diff --git a/libdrgn/program.h b/libdrgn/program.h index e8687b683..a9bdb2bbc 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -25,7 +25,6 @@ #include "hash_table.h" #include "language.h" #include "memory_reader.h" -#include "object_index.h" #include "platform.h" #include "pp.h" #include "symbol.h" @@ -33,6 +32,7 @@ #include "vector.h" struct drgn_type_finder; +struct drgn_object_finder; struct drgn_symbol_finder; /** @@ -112,7 +112,7 @@ struct drgn_program { /* * Debugging information. */ - struct drgn_object_index oindex; + struct drgn_handler_list object_finders; struct drgn_debug_info dbinfo; struct drgn_handler_list symbol_finders; @@ -253,11 +253,6 @@ struct drgn_error *drgn_program_init_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid); -struct drgn_error * -drgn_program_add_object_finder_impl(struct drgn_program *prog, - struct drgn_object_finder *finder, - drgn_object_find_fn fn, void *arg); - static inline struct drgn_error * drgn_program_is_little_endian(struct drgn_program *prog, bool *ret) { @@ -391,6 +386,13 @@ drgn_program_register_type_finder_impl(struct drgn_program *prog, const struct drgn_type_finder_ops *ops, void *arg, size_t enable_index); +struct drgn_error * +drgn_program_register_object_finder_impl(struct drgn_program *prog, + struct drgn_object_finder *finder, + const char *name, + const struct drgn_object_finder_ops *ops, + void *arg, size_t enable_index); + struct drgn_error * drgn_program_register_symbol_finder_impl(struct drgn_program *prog, struct drgn_symbol_finder *finder, diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 000bbfbdb..dfb577de7 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -569,6 +569,7 @@ py_symbol_find_fn(const char *name, uint64_t addr, _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); \ if (!arg) \ return NULL; +#define object_finder_arg(self, fn) PyObject *arg = fn; #define symbol_finder_arg type_finder_arg #define DEFINE_PROGRAM_FINDER_METHODS(which) \ @@ -712,6 +713,7 @@ static PyObject *Program_enabled_##which##_finders(Program *self) \ } DEFINE_PROGRAM_FINDER_METHODS(type) +DEFINE_PROGRAM_FINDER_METHODS(object) DEFINE_PROGRAM_FINDER_METHODS(symbol) static PyObject *deprecated_finder_name_obj(PyObject *fn) @@ -783,13 +785,23 @@ static PyObject *Program_add_object_finder(Program *self, PyObject *args, return NULL; } - if (Program_hold_object(self, fn)) + _cleanup_pydecref_ PyObject *name_obj = deprecated_finder_name_obj(fn); + if (!name_obj) + return NULL; + const char *name = PyUnicode_AsUTF8(name_obj); + if (!name) return NULL; - err = drgn_program_add_object_finder(&self->prog, py_object_find_fn, - fn); + if (!Program_hold_reserve(self, 1)) + return NULL; + const struct drgn_object_finder_ops ops = { + .find = py_object_find_fn, + }; + err = drgn_program_register_object_finder(&self->prog, name, &ops, fn, + 0); if (err) return set_drgn_error(err); + Program_hold_object(self, fn); Py_RETURN_NONE; } @@ -1457,6 +1469,7 @@ static PyMethodDef Program_methods[] = { {"add_memory_segment", (PyCFunction)Program_add_memory_segment, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_memory_segment_DOC}, PROGRAM_FINDER_METHOD_DEFS(type), + PROGRAM_FINDER_METHOD_DEFS(object), PROGRAM_FINDER_METHOD_DEFS(symbol), {"add_type_finder", (PyCFunction)Program_add_type_finder, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_type_finder_DOC}, diff --git a/tests/__init__.py b/tests/__init__.py index c29a51085..e3d4fd6ea 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -107,7 +107,7 @@ def mock_object_find(prog, name, flags, filename): if types is not None: prog.register_type_finder("mock", mock_find_type, enable_index=0) if objects is not None: - prog.add_object_finder(mock_object_find) + prog.register_object_finder("mock", mock_object_find, enable_index=0) return prog diff --git a/tests/test_program.py b/tests/test_program.py index 7f5306129..39d3dc1e3 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -512,6 +512,78 @@ def test_add_not_found(self): self.assertRaises(LookupError, prog.type, "struct foo") +class TestObjectFinder(TestCase): + def test_register(self): + prog = Program(MOCK_PLATFORM) + + # We don't test every corner case because the symbol finder tests cover + # the shared part. + self.assertEqual(prog.registered_object_finders(), {"dwarf"}) + self.assertEqual(prog.enabled_object_finders(), ["dwarf"]) + + prog.register_object_finder( + "foo", lambda prog, name, flags, filename: None, enable_index=-1 + ) + self.assertEqual(prog.registered_object_finders(), {"dwarf", "foo"}) + self.assertEqual(prog.enabled_object_finders(), ["dwarf", "foo"]) + + prog.set_enabled_object_finders(["foo"]) + self.assertEqual(prog.registered_object_finders(), {"dwarf", "foo"}) + self.assertEqual(prog.enabled_object_finders(), ["foo"]) + + def test_add_object_finder(self): + prog = Program(MOCK_PLATFORM) + + def dummy(prog, name, flags, filename): + return Object(prog, "int", 1) + + prog.add_object_finder(dummy) + self.assertTrue( + any("dummy" in name for name in prog.registered_object_finders()) + ) + self.assertIn("dummy", prog.enabled_object_finders()[0]) + self.assertIdentical(prog.object("foo"), Object(prog, "int", 1)) + + def test_register_invalid(self): + prog = Program(MOCK_PLATFORM) + self.assertRaises(TypeError, prog.register_object_finder, "foo", "foo") + prog.register_object_finder( + "foo", lambda prog, name, flags, filename: "foo", enable_index=0 + ) + self.assertRaises(TypeError, prog.object, "foo") + + def test_add_invalid(self): + prog = Program(MOCK_PLATFORM) + self.assertRaises(TypeError, prog.add_object_finder, "foo") + prog.add_object_finder(lambda prog, name, flags, filename: "foo") + self.assertRaises(TypeError, prog.object, "foo") + + def test_wrong_program(self): + prog = Program(MOCK_PLATFORM) + prog.register_object_finder( + "foo", + lambda prog, name, flags, filename: Object( + Program(MOCK_PLATFORM), "int", 1 + ), + enable_index=0, + ) + self.assertRaisesRegex( + ValueError, + "different program", + prog.object, + "foo", + ) + + def test_not_found(self): + prog = Program(MOCK_PLATFORM) + self.assertRaises(LookupError, prog.object, "foo") + prog.register_object_finder( + "foo", lambda prog, name, flags, filename: None, enable_index=0 + ) + self.assertRaises(LookupError, prog.object, "foo") + self.assertFalse("foo" in prog) + + class TestTypes(MockProgramTestCase): def test_already_type(self): self.assertIdentical( @@ -855,18 +927,6 @@ def test_array_of_pointers_to_array(self): class TestObjects(MockProgramTestCase): - def test_invalid_finder(self): - self.assertRaises(TypeError, self.prog.add_object_finder, "foo") - - self.prog.add_object_finder(lambda prog, name, flags, filename: "foo") - self.assertRaises(TypeError, self.prog.object, "foo") - - def test_not_found(self): - self.assertRaises(LookupError, self.prog.object, "foo") - self.prog.add_object_finder(lambda prog, name, flags, filename: None) - self.assertRaises(LookupError, self.prog.object, "foo") - self.assertFalse("foo" in self.prog) - def test_constant(self): self.objects.append( MockObject("PAGE_SIZE", self.prog.int_type("int", 4, True), value=4096)