libdrgn: support value objects with >64-bit integer types

The Linux kernel's struct task_struct on AArch64 contains an array of __uint128_t: >>> task = find_task(prog, 1) >>> task.type_ struct task_struct * >>> task.thread.type_ struct thread_struct { struct cpu_context cpu_context; struct { unsigned long tp_value; unsigned long tp2_value; struct user_fpsimd_state fpsimd_state; } uw; enum fp_type fp_type; unsigned int fpsimd_cpu; void *sve_state; void *sme_state; unsigned int vl[2]; unsigned int vl_onexec[2]; unsigned long fault_address; unsigned long fault_code; struct debug_info debug; struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; u64 mte_ctrl; u64 sctlr_user; u64 svcr; u64 tpidr2_el0; } >>> task.thread.uw.fpsimd_state.type_ struct user_fpsimd_state { __int128 unsigned vregs[32]; __u32 fpsr; __u32 fpcr; __u32 __reserved[2]; } As a result, printing a task_struct fails: >>> task Traceback (most recent call last): File "<console>", line 1, in <module> File "/host/home/osandov/repos/drgn3/drgn/cli.py", line 140, in _displayhook text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) NotImplementedError: integer values larger than 64 bits are not yet supported PR #311 suggested treating >64-bit integers as byte arrays for now; I tried an alternate hack of handling >64-bit integers only in the pretty-printing code. Both of these had issues, though. Instead, let's push >64-bit integer support a little further and allow storing "big integer" value objects. We still don't support any operations on them, so this still doesn't complete #170. We store the raw bytes of the value for now, but we'll probably change this if we add support for operations (e.g., to store the value as an mp_limb_t array for GMP). We also print >64-bit integer types in hexadecimal for simplicity. This is inconsistent with the existing behavior of printing in decimal, but more readable. In the future, we might want to add heuristics to decide when to print in decimal vs hexadecimal for all sizes. Closes #311. Signed-off-by: Omar Sandoval <[email protected]>
osandov · Aug 2, 2023 · 243f6fb · 243f6fb
1 parent 91b26e2
commit 243f6fb
Show file tree

Hide file tree

Showing 11 changed files with 925 additions and 362 deletions.
diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in
@@ -1138,24 +1138,37 @@ enum drgn_object_encoding {
 	 * Memory buffer.
 	 *
 	 * This is used for objects with a structure, union, class, or array
-	 * type. The value is a buffer of the contents of that object's memory
-	 * in the program.
+	 * type.
 	 */
 	DRGN_OBJECT_ENCODING_BUFFER,
 	/**
 	 * Signed integer.
 	 *
 	 * This is used for objects with a signed integer or signed enumerated
-	 * type.
+	 * type no larger than 64 bits.
 	 */
 	DRGN_OBJECT_ENCODING_SIGNED,
 	/**
 	 * Unsigned integer.
 	 *
 	 * This is used for objects with a unsigned integer, boolean, or pointer
-	 * type.
+	 * type no larger than 64 bits.
 	 */
 	DRGN_OBJECT_ENCODING_UNSIGNED,
+	/**
+	 * Big signed integer.
+	 *
+	 * This is used for objects with a signed integer or signed enumerated
+	 * type larger than 64 bits.
+	 */
+	DRGN_OBJECT_ENCODING_SIGNED_BIG,
+	/**
+	 * Big unsigned integer.
+	 *
+	 * This is used for objects with a unsigned integer, boolean, or pointer
+	 * type larger than 64 bits.
+	 */
+	DRGN_OBJECT_ENCODING_UNSIGNED_BIG,
 	/**
 	 * Floating-point value.
 	 *
@@ -1198,23 +1211,30 @@ drgn_object_encoding_is_complete(enum drgn_object_encoding encoding)
 /** Value of a @ref drgn_object. */
 union drgn_value {
 	/**
-	 * Pointer to an external buffer for a @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_BUFFER value.
+	 * Pointer to an external buffer for a @ref DRGN_OBJECT_ENCODING_BUFFER,
+	 * @ref DRGN_OBJECT_ENCODING_SIGNED_BIG, or @ref
+	 * DRGN_OBJECT_ENCODING_UNSIGNED_BIG value.
+	 *
+	 * For @ref DRGN_OBJECT_ENCODING_BUFFER, this contains the object's
+	 * representation in the memory of the program.
+	 *
+	 * For @ref DRGN_OBJECT_ENCODING_SIGNED_BIG and @ref
+	 * DRGN_OBJECT_ENCODING_UNSIGNED_BIG, the representation of the value is
+	 * an implementation detail which may change.
 	 */
 	char *bufp;
 	/**
-	 * Inline buffer for a @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_BUFFER value.
+	 * Inline buffer for a @ref DRGN_OBJECT_ENCODING_BUFFER value.
 	 *
 	 * Tiny buffers (see @ref drgn_value_is_inline()) are stored inline here
 	 * instead of in a separate allocation.
 	 */
 	char ibuf[8];
-	/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED value. */
+	/** @ref DRGN_OBJECT_ENCODING_SIGNED value. */
 	int64_t svalue;
-	/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_UNSIGNED value. */
+	/** @ref DRGN_OBJECT_ENCODING_UNSIGNED value. */
 	uint64_t uvalue;
-	/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_FLOAT value. */
+	/** @ref DRGN_OBJECT_ENCODING_FLOAT value. */
 	double fvalue;
 };
 
@@ -1277,11 +1297,11 @@ struct drgn_object {
 	/**
 	 * Whether this object is little-endian.
 	 *
-	 * Valid only for scalars (i.e., @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED, @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_UNSIGNED, @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_FLOAT, or @ref
-	 * drgn_object_encoding::DRGN_OBJECT_ENCODING_INCOMPLETE_INTEGER).
+	 * Valid only for scalars (i.e., @ref DRGN_OBJECT_ENCODING_SIGNED, @ref
+	 * DRGN_OBJECT_ENCODING_UNSIGNED, @ref DRGN_OBJECT_ENCODING_SIGNED_BIG,
+	 * @ref DRGN_OBJECT_ENCODING_UNSIGNED_BIG, @ref
+	 * DRGN_OBJECT_ENCODING_FLOAT, or @ref
+	 * DRGN_OBJECT_ENCODING_INCOMPLETE_INTEGER).
 	 */
 	bool little_endian;
 	/**

diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c
@@ -690,30 +690,70 @@ c_format_int_object(const struct drgn_object *obj,
 		return NULL;
 	}
 
+	union drgn_value value_mem;
+	const union drgn_value *value;
+	err = drgn_object_read_value(obj, &value_mem, &value);
+	if (err)
+		return err;
 	switch (obj->encoding) {
-	case DRGN_OBJECT_ENCODING_SIGNED: {
-		int64_t svalue;
-
-		err = drgn_object_read_signed(obj, &svalue);
-		if (err)
-			return err;
-		if (!string_builder_appendf(sb, "%" PRId64, svalue))
-			return &drgn_enomem;
-		return NULL;
-	}
-	case DRGN_OBJECT_ENCODING_UNSIGNED: {
-		uint64_t uvalue;
-
-		err = drgn_object_read_unsigned(obj, &uvalue);
-		if (err)
-			return err;
-		if (!string_builder_appendf(sb, "%" PRIu64, uvalue))
-			return &drgn_enomem;
-		return NULL;
+	case DRGN_OBJECT_ENCODING_SIGNED:
+		if (!string_builder_appendf(sb, "%" PRId64, value->svalue)) {
+			err = &drgn_enomem;
+			goto out;
+		}
+		break;
+	case DRGN_OBJECT_ENCODING_UNSIGNED:
+		if (!string_builder_appendf(sb, "%" PRIu64, value->uvalue)) {
+			err = &drgn_enomem;
+			goto out;
+		}
+		break;
+	case DRGN_OBJECT_ENCODING_SIGNED_BIG:
+	case DRGN_OBJECT_ENCODING_UNSIGNED_BIG: {
+		if (!string_builder_append(sb, "0x")) {
+			err = &drgn_enomem;
+			goto out;
+		}
+		const uint8_t *buf = (uint8_t *)value->bufp;
+		size_t bytes = drgn_object_size(obj);
+		if (obj->little_endian) {
+			size_t i = bytes - 1;
+			while (i > 0 && buf[i] == 0)
+				i--;
+			if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
+				err = &drgn_enomem;
+				goto out;
+			}
+			while (i-- > 0) {
+				if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
+					err = &drgn_enomem;
+					goto out;
+				}
+			}
+		} else {
+			size_t i = 0;
+			while (i < bytes - 1 && buf[i] == 0)
+				i++;
+			if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
+				err = &drgn_enomem;
+				goto out;
+			}
+			while (++i < bytes) {
+				if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
+					err = &drgn_enomem;
+					goto out;
+				}
+			}
+		}
+		break;
 	}
 	default:
 		UNREACHABLE();
 	}
+	err = NULL;
+out:
+	drgn_object_deinit_value(obj, value);
+	return err;
 }
 
 static struct drgn_error *