Skip to content

Commit

Permalink
libdrgn: support value objects with >64-bit integer types
Browse files Browse the repository at this point in the history
The Linux kernel's struct task_struct on AArch64 contains an array of
__uint128_t:

  >>> task = find_task(prog, 1)
  >>> task.type_
  struct task_struct *
  >>> task.thread.type_
  struct thread_struct {
          struct cpu_context cpu_context;
          struct {
                  unsigned long tp_value;
                  unsigned long tp2_value;
                  struct user_fpsimd_state fpsimd_state;
          } uw;
          enum fp_type fp_type;
          unsigned int fpsimd_cpu;
          void *sve_state;
          void *sme_state;
          unsigned int vl[2];
          unsigned int vl_onexec[2];
          unsigned long fault_address;
          unsigned long fault_code;
          struct debug_info debug;
          struct ptrauth_keys_user keys_user;
          struct ptrauth_keys_kernel keys_kernel;
          u64 mte_ctrl;
          u64 sctlr_user;
          u64 svcr;
          u64 tpidr2_el0;
  }
  >>> task.thread.uw.fpsimd_state.type_
  struct user_fpsimd_state {
          __int128 unsigned vregs[32];
          __u32 fpsr;
          __u32 fpcr;
          __u32 __reserved[2];
  }

As a result, printing a task_struct fails:

  >>> task
  Traceback (most recent call last):
    File "<console>", line 1, in <module>
    File "/host/home/osandov/repos/drgn3/drgn/cli.py", line 140, in _displayhook
      text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns)
  NotImplementedError: integer values larger than 64 bits are not yet supported

PR #311 suggested treating >64-bit integers as byte arrays for now; I
tried an alternate hack of handling >64-bit integers only in the
pretty-printing code. Both of these had issues, though.

Instead, let's push >64-bit integer support a little further and allow
storing "big integer" value objects. We still don't support any
operations on them, so this still doesn't complete #170. We store the
raw bytes of the value for now, but we'll probably change this if we add
support for operations (e.g., to store the value as an mp_limb_t array
for GMP). We also print >64-bit integer types in hexadecimal for
simplicity. This is inconsistent with the existing behavior of printing
in decimal, but more readable. In the future, we might want to add
heuristics to decide when to print in decimal vs hexadecimal for all
sizes.

Closes #311.

Signed-off-by: Omar Sandoval <[email protected]>
  • Loading branch information
osandov committed Aug 2, 2023
1 parent 91b26e2 commit 243f6fb
Show file tree
Hide file tree
Showing 11 changed files with 925 additions and 362 deletions.
52 changes: 36 additions & 16 deletions libdrgn/drgn.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -1138,24 +1138,37 @@ enum drgn_object_encoding {
* Memory buffer.
*
* This is used for objects with a structure, union, class, or array
* type. The value is a buffer of the contents of that object's memory
* in the program.
* type.
*/
DRGN_OBJECT_ENCODING_BUFFER,
/**
* Signed integer.
*
* This is used for objects with a signed integer or signed enumerated
* type.
* type no larger than 64 bits.
*/
DRGN_OBJECT_ENCODING_SIGNED,
/**
* Unsigned integer.
*
* This is used for objects with a unsigned integer, boolean, or pointer
* type.
* type no larger than 64 bits.
*/
DRGN_OBJECT_ENCODING_UNSIGNED,
/**
* Big signed integer.
*
* This is used for objects with a signed integer or signed enumerated
* type larger than 64 bits.
*/
DRGN_OBJECT_ENCODING_SIGNED_BIG,
/**
* Big unsigned integer.
*
* This is used for objects with a unsigned integer, boolean, or pointer
* type larger than 64 bits.
*/
DRGN_OBJECT_ENCODING_UNSIGNED_BIG,
/**
* Floating-point value.
*
Expand Down Expand Up @@ -1198,23 +1211,30 @@ drgn_object_encoding_is_complete(enum drgn_object_encoding encoding)
/** Value of a @ref drgn_object. */
union drgn_value {
/**
* Pointer to an external buffer for a @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_BUFFER value.
* Pointer to an external buffer for a @ref DRGN_OBJECT_ENCODING_BUFFER,
* @ref DRGN_OBJECT_ENCODING_SIGNED_BIG, or @ref
* DRGN_OBJECT_ENCODING_UNSIGNED_BIG value.
*
* For @ref DRGN_OBJECT_ENCODING_BUFFER, this contains the object's
* representation in the memory of the program.
*
* For @ref DRGN_OBJECT_ENCODING_SIGNED_BIG and @ref
* DRGN_OBJECT_ENCODING_UNSIGNED_BIG, the representation of the value is
* an implementation detail which may change.
*/
char *bufp;
/**
* Inline buffer for a @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_BUFFER value.
* Inline buffer for a @ref DRGN_OBJECT_ENCODING_BUFFER value.
*
* Tiny buffers (see @ref drgn_value_is_inline()) are stored inline here
* instead of in a separate allocation.
*/
char ibuf[8];
/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED value. */
/** @ref DRGN_OBJECT_ENCODING_SIGNED value. */
int64_t svalue;
/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_UNSIGNED value. */
/** @ref DRGN_OBJECT_ENCODING_UNSIGNED value. */
uint64_t uvalue;
/** @ref drgn_object_encoding::DRGN_OBJECT_ENCODING_FLOAT value. */
/** @ref DRGN_OBJECT_ENCODING_FLOAT value. */
double fvalue;
};

Expand Down Expand Up @@ -1277,11 +1297,11 @@ struct drgn_object {
/**
* Whether this object is little-endian.
*
* Valid only for scalars (i.e., @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED, @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_UNSIGNED, @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_FLOAT, or @ref
* drgn_object_encoding::DRGN_OBJECT_ENCODING_INCOMPLETE_INTEGER).
* Valid only for scalars (i.e., @ref DRGN_OBJECT_ENCODING_SIGNED, @ref
* DRGN_OBJECT_ENCODING_UNSIGNED, @ref DRGN_OBJECT_ENCODING_SIGNED_BIG,
* @ref DRGN_OBJECT_ENCODING_UNSIGNED_BIG, @ref
* DRGN_OBJECT_ENCODING_FLOAT, or @ref
* DRGN_OBJECT_ENCODING_INCOMPLETE_INTEGER).
*/
bool little_endian;
/**
Expand Down
78 changes: 59 additions & 19 deletions libdrgn/language_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -690,30 +690,70 @@ c_format_int_object(const struct drgn_object *obj,
return NULL;
}

union drgn_value value_mem;
const union drgn_value *value;
err = drgn_object_read_value(obj, &value_mem, &value);
if (err)
return err;
switch (obj->encoding) {
case DRGN_OBJECT_ENCODING_SIGNED: {
int64_t svalue;

err = drgn_object_read_signed(obj, &svalue);
if (err)
return err;
if (!string_builder_appendf(sb, "%" PRId64, svalue))
return &drgn_enomem;
return NULL;
}
case DRGN_OBJECT_ENCODING_UNSIGNED: {
uint64_t uvalue;

err = drgn_object_read_unsigned(obj, &uvalue);
if (err)
return err;
if (!string_builder_appendf(sb, "%" PRIu64, uvalue))
return &drgn_enomem;
return NULL;
case DRGN_OBJECT_ENCODING_SIGNED:
if (!string_builder_appendf(sb, "%" PRId64, value->svalue)) {
err = &drgn_enomem;
goto out;
}
break;
case DRGN_OBJECT_ENCODING_UNSIGNED:
if (!string_builder_appendf(sb, "%" PRIu64, value->uvalue)) {
err = &drgn_enomem;
goto out;
}
break;
case DRGN_OBJECT_ENCODING_SIGNED_BIG:
case DRGN_OBJECT_ENCODING_UNSIGNED_BIG: {
if (!string_builder_append(sb, "0x")) {
err = &drgn_enomem;
goto out;
}
const uint8_t *buf = (uint8_t *)value->bufp;
size_t bytes = drgn_object_size(obj);
if (obj->little_endian) {
size_t i = bytes - 1;
while (i > 0 && buf[i] == 0)
i--;
if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
while (i-- > 0) {
if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
}
} else {
size_t i = 0;
while (i < bytes - 1 && buf[i] == 0)
i++;
if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
while (++i < bytes) {
if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
}
}
break;
}
default:
UNREACHABLE();
}
err = NULL;
out:
drgn_object_deinit_value(obj, value);
return err;
}

static struct drgn_error *
Expand Down
Loading

0 comments on commit 243f6fb

Please sign in to comment.