From 0b5fe9f24a382cafddef47f65ee93bdd767eeef7 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Wed, 11 Dec 2019 00:00:05 +0100 Subject: [PATCH 01/14] Allocate enough memory for ranges in mipsdisasm --- mipsdisasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 067389d..663d25f 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -725,7 +725,7 @@ static void parse_arguments(int argc, char *argv[], arg_config *config) print_usage(); exit(1); } - config->ranges = malloc(argc / 2 * sizeof(*config->ranges)); + config->ranges = malloc(argc * sizeof(*config->ranges)); config->range_count = 0; for (int i = 1; i < argc; i++) { if (argv[i][0] == '-') { From de698e10bf972a5f2c5dc354599e1f0a3ecf9158 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sat, 21 Dec 2019 13:18:10 +0100 Subject: [PATCH 02/14] Use printf("%s", string) instead of printf(string) --- mipsdisasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 663d25f..f818413 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -496,7 +496,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) unsigned int branch_target = (unsigned int)insn->operands[o].imm; label = labels_find(&block->locals, branch_target); if (label >= 0) { - fprintf(out, block->locals.labels[label].name); + fprintf(out, "%s", block->locals.labels[label].name); } else { fprintf(out, "0x%08X", branch_target); } From af378dbe620e447b6bfb3ca4da418279f24831ff Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 18:31:32 +0100 Subject: [PATCH 03/14] Add a vector type to mipsdisasm --- mipsdisasm.c | 156 +++++++++++++++++++++++++-------------------------- 1 file changed, 75 insertions(+), 81 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index f818413..0953c08 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -12,18 +12,34 @@ #define MIPSDISASM_VERSION "0.2+" // typedefs +#define vec(type) vec_ ## type +#define DEFINE_VEC(type) typedef struct { type *items; int alloc; int count; } vec(type); +#define vec_at(v, index) &((v)->items)[index] + +#define vec_alloc(v, initial_capacity) do { \ + (v).count = 0; \ + (v).alloc = (initial_capacity); \ + (v).items = malloc(sizeof(*(v).items) * (v).alloc); \ +} while (0) + +#define vec_free(v) do { \ + free(v.items); \ + v.items = NULL; \ +} while (0) + +#define vec_push(v) (((v).count >= (v).alloc ? \ + ((v).alloc *= 2, \ + (v).items = realloc((v).items, sizeof(*(v).items) * (v).alloc), \ + 1) : 1), \ + &(v).items[(v).count++]) + typedef struct { char name[60]; unsigned int vaddr; } asm_label; -typedef struct -{ - asm_label *labels; - int alloc; - int count; -} label_buf; +DEFINE_VEC(asm_label) typedef struct { @@ -47,7 +63,7 @@ typedef struct typedef struct _asm_block { - label_buf locals; + vec(asm_label) locals; disasm_data *instructions; int instruction_count; unsigned int offset; @@ -55,14 +71,13 @@ typedef struct _asm_block unsigned int vaddr; } asm_block; +DEFINE_VEC(asm_block) + // hidden disassembler state struct typedef struct _disasm_state { - label_buf globals; - - asm_block *blocks; - int block_alloc; - int block_count; + vec(asm_label) globals; + vec(asm_block) blocks; csh handle; @@ -70,21 +85,9 @@ typedef struct _disasm_state int merge_pseudo; } disasm_state; -// default label buffer allocate -static void labels_alloc(label_buf *buf) -{ - buf->count = 0; - buf->alloc = 128; - buf->labels = malloc(sizeof(*buf->labels) * buf->alloc); -} - -static void labels_add(label_buf *buf, const char *name, unsigned int vaddr) +static void labels_add(vec(asm_label) *vec, const char *name, unsigned int vaddr) { - if (buf->count >= buf->alloc) { - buf->alloc *= 2; - buf->labels = realloc(buf->labels, sizeof(*buf->labels) * buf->alloc); - } - asm_label *l = &buf->labels[buf->count]; + asm_label *l = vec_push(*vec); // if name is null, generate based on vaddr if (name == NULL) { sprintf(l->name, "L%08X", vaddr); @@ -92,7 +95,6 @@ static void labels_add(label_buf *buf, const char *name, unsigned int vaddr) strcpy(l->name, name); } l->vaddr = vaddr; - buf->count++; } static int label_cmp(const void *a, const void *b) @@ -109,18 +111,18 @@ static int label_cmp(const void *a, const void *b) } } -static void labels_sort(label_buf *buf) +static void labels_sort(vec(asm_label) *vec) { - qsort(buf->labels, buf->count, sizeof(buf->labels[0]), label_cmp); + qsort(vec->items, vec->count, sizeof(vec->items[0]), label_cmp); } -// labels: label buffer to search in +// vec: label vector to search in // vaddr: virtual address to find -// returns index in buf->labels if found, -1 otherwise -static int labels_find(const label_buf *buf, unsigned int vaddr) +// returns index in vec->items if found, -1 otherwise +static int labels_find(const vec(asm_label) *vec, unsigned int vaddr) { - for (int i = 0; i < buf->count; i++) { - if (buf->labels[i].vaddr == vaddr) { + for (int i = 0; i < vec->count; i++) { + if (vec->items[i].vaddr == vaddr) { return i; } } @@ -130,7 +132,7 @@ static int labels_find(const label_buf *buf, unsigned int vaddr) // try to find a matching LUI for a given register static void link_with_lui(disasm_state *state, int block_id, int offset, unsigned int reg, unsigned int mem_imm) { - asm_block *block = &state->blocks[block_id]; + asm_block *block = &state->blocks.items[block_id]; #define MAX_LOOKBACK 128 disasm_data *insn = block->instructions; // don't attempt to compute addresses for zero offset @@ -183,7 +185,7 @@ static void link_with_lui(disasm_state *state, int block_id, int offset, unsigne // disassemble a block of code and collect JALs and local labels static void disassemble_block(unsigned char *data, unsigned int length, unsigned int vaddr, disasm_state *state, int block_id) { - asm_block *block = &state->blocks[block_id]; + asm_block *block = &state->blocks.items[block_id]; // capstone structures require a lot of data, so only request a small block at a time and preserve the required data int remaining = length; @@ -347,11 +349,8 @@ static void disassemble_block(unsigned char *data, unsigned int length, unsigned disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo) { disasm_state *state = malloc(sizeof(*state)); - labels_alloc(&state->globals); - - state->block_count = 0; - state->block_alloc = 128; - state->blocks = malloc(sizeof(*state->blocks) * state->block_alloc); + vec_alloc(state->globals, 128); + vec_alloc(state->blocks, 128); state->syntax = syntax; state->merge_pseudo = merge_pseudo; @@ -370,16 +369,15 @@ disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo) void disasm_state_free(disasm_state *state) { if (state) { - for (int i = 0; i < state->block_count; i++) { - if (state->blocks[i].instructions) { - free(state->blocks[i].instructions); - state->blocks[i].instructions = NULL; + for (int i = 0; i < state->blocks.count; i++) { + asm_block *block = &state->blocks.items[i]; + if (block->instructions) { + free(block->instructions); + block->instructions = NULL; } } - if (state->blocks) { - free(state->blocks); - state->blocks = NULL; - } + vec_free(state->blocks); + vec_free(state->globals); cs_close(&state->handle); } } @@ -403,23 +401,18 @@ int disasm_label_lookup(const disasm_state *state, unsigned int vaddr, char *nam void mipsdisasm_pass1(unsigned char *data, unsigned int offset, unsigned int length, unsigned int vaddr, disasm_state *state) { - if (state->block_count >= state->block_alloc) { - state->block_alloc *= 2; - state->blocks = realloc(state->blocks, sizeof(*state->blocks) * state->block_alloc); - } - asm_block *block = &state->blocks[state->block_count]; - labels_alloc(&block->locals); + asm_block *block = vec_push(state->blocks); + vec_alloc(block->locals, 128); block->offset = offset; block->length = length; block->vaddr = vaddr; // collect all branch and jump targets - disassemble_block(&data[offset], length, vaddr, state, state->block_count); + disassemble_block(&data[offset], length, vaddr, state, state->blocks.count - 1); // sort global and local labels labels_sort(&state->globals); - labels_sort(&state->blocks[state->block_count].locals); - state->block_count++; + labels_sort(&block->locals); } void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) @@ -431,9 +424,9 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) int label; int indent = 0; // lookup block by offset - for (int i = 0; i < state->block_count; i++) { - if (state->blocks[i].offset == offset) { - block = &state->blocks[i]; + for (int i = 0; i < state->blocks.count; i++) { + if (state->blocks.items[i].offset == offset) { + block = &state->blocks.items[i]; break; } } @@ -443,10 +436,10 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) } vaddr = block->vaddr; // skip labels before this section - while ( (global_idx < state->globals.count) && (vaddr > state->globals.labels[global_idx].vaddr) ) { + while ( (global_idx < state->globals.count) && (vaddr > state->globals.items[global_idx].vaddr) ) { global_idx++; } - while ( (local_idx < block->locals.count) && (vaddr > block->locals.labels[local_idx].vaddr) ) { + while ( (local_idx < block->locals.count) && (vaddr > block->locals.items[local_idx].vaddr) ) { local_idx++; } for (int i = 0; i < block->instruction_count; i++) { @@ -456,13 +449,14 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) fprintf(out, "\n"); } // insert all global labels at this address - while ( (global_idx < state->globals.count) && (vaddr == state->globals.labels[global_idx].vaddr) ) { - fprintf(out, "%s:\n", state->globals.labels[global_idx].name); + while ( (global_idx < state->globals.count) && (vaddr == state->globals.items[global_idx].vaddr) ) { + const char *name = state->globals.items[global_idx].name; + fprintf(out, "%s:\n", name); global_idx++; } // insert all local labels at this address - while ( (local_idx < block->locals.count) && (vaddr == block->locals.labels[local_idx].vaddr) ) { - fprintf(out, "%s:\n", block->locals.labels[local_idx].name); + while ( (local_idx < block->locals.count) && (vaddr == block->locals.items[local_idx].vaddr) ) { + fprintf(out, "%s:\n", block->locals.items[local_idx].name); local_idx++; } fprintf(out, "/* %06X %08X %02X%02X%02X%02X */ ", offset, vaddr, insn->bytes[0], insn->bytes[1], insn->bytes[2], insn->bytes[3]); @@ -478,7 +472,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) unsigned int jal_target = (unsigned int)insn->operands[0].imm; label = labels_find(&state->globals, jal_target); if (label >= 0) { - fprintf(out, "%s\n", state->globals.labels[label].name); + fprintf(out, "%s\n", state->globals.items[label].name); } else { fprintf(out, "0x%08X\n", jal_target); } @@ -496,7 +490,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) unsigned int branch_target = (unsigned int)insn->operands[o].imm; label = labels_find(&block->locals, branch_target); if (label >= 0) { - fprintf(out, "%s", block->locals.labels[label].name); + fprintf(out, "%s", block->locals.items[label].name); } else { fprintf(out, "0x%08X", branch_target); } @@ -558,7 +552,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) case MIPS_INS_ADDIU: fprintf(out, "%-5s $%s, %%hi(%s) # %s\n", insn->mnemonic, cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, insn->op_str); + state->globals.items[label].name, insn->op_str); break; case MIPS_INS_ORI: fprintf(out, "%-5s $%s, (0x%08X >> 16) # %s %s\n", insn->mnemonic, @@ -568,7 +562,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) default: // LW/SW/etc. fprintf(out, "%-5s $%s, %%hi(%s) # %s\n", insn->mnemonic, cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, insn->op_str); + state->globals.items[label].name, insn->op_str); break; } break; @@ -577,7 +571,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) case MIPS_INS_ADDIU: fprintf(out, "%-5s $%s, %s // %s %s\n", "la.u", cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, + state->globals.items[label].name, insn->mnemonic, insn->op_str); break; case MIPS_INS_ORI: @@ -588,7 +582,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) default: // LW/SW/etc. fprintf(out, "%-5s $%s, hi(%s) // %s\n", insn->mnemonic, cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, insn->op_str); + state->globals.items[label].name, insn->op_str); break; } break; @@ -599,13 +593,13 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) case ASM_GAS: fprintf(out, "%-5s $%s, %%lo(%s) # %s %s\n", insn->mnemonic, cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, + state->globals.items[label].name, insn->mnemonic, insn->op_str); break; case ASM_ARMIPS: fprintf(out, "%-5s $%s, %s // %s %s\n", "la.l", cs_reg_name(state->handle, insn->operands[0].reg), - state->globals.labels[label].name, + state->globals.items[label].name, insn->mnemonic, insn->op_str); break; } @@ -629,7 +623,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) fprintf(out, "%-5s $%s, %slo(%s)($%s)\n", insn->mnemonic, cs_reg_name(state->handle, insn->operands[0].reg), state->syntax == ASM_GAS ? "%" : "", - state->globals.labels[label].name, + state->globals.items[label].name, cs_reg_name(state->handle, insn->operands[1].reg)); } } else { @@ -853,17 +847,17 @@ int main(int argc, char *argv[]) // output global labels not in asm sections if (args.syntax == ASM_ARMIPS) { for (int i = 0; i < state->globals.count; i++) { - unsigned int vaddr = state->globals.labels[i].vaddr; + unsigned int vaddr = state->globals.items[i].vaddr; int global_in_asm = 0; - for (int j = 0; j < state->block_count; j++) { - unsigned int block_vaddr = state->blocks[j].vaddr; - if (vaddr >= block_vaddr && vaddr < block_vaddr + state->blocks[j].length) { + for (int j = 0; j < state->blocks.count; j++) { + unsigned int block_vaddr = state->blocks.items[j].vaddr; + if (vaddr >= block_vaddr && vaddr < block_vaddr + state->blocks.items[j].length) { global_in_asm = 1; break; } } if (!global_in_asm) { - fprintf(out, ".definelabel %s, 0x%08X\n", state->globals.labels[i].name, vaddr); + fprintf(out, ".definelabel %s, 0x%08X\n", state->globals.items[i].name, vaddr); } } } From 87326719a3d054713898b935acfd954fafeaea36 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 18:37:15 +0100 Subject: [PATCH 04/14] Add an option for emitting glabel for global labels --- mipsdisasm.c | 22 +++++++++++++++++----- mipsdisasm.h | 3 ++- n64split.c | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 0953c08..df0cf2e 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -83,6 +83,7 @@ typedef struct _disasm_state asm_syntax syntax; int merge_pseudo; + int emit_glabel; } disasm_state; static void labels_add(vec(asm_label) *vec, const char *name, unsigned int vaddr) @@ -346,7 +347,7 @@ static void disassemble_block(unsigned char *data, unsigned int length, unsigned } } -disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo) +disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo, int emit_glabel) { disasm_state *state = malloc(sizeof(*state)); vec_alloc(state->globals, 128); @@ -354,6 +355,7 @@ disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo) state->syntax = syntax; state->merge_pseudo = merge_pseudo; + state->emit_glabel = emit_glabel; // open capstone disassembler if (cs_open(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN, &state->handle) != CS_ERR_OK) { @@ -451,7 +453,11 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) // insert all global labels at this address while ( (global_idx < state->globals.count) && (vaddr == state->globals.items[global_idx].vaddr) ) { const char *name = state->globals.items[global_idx].name; - fprintf(out, "%s:\n", name); + if (state->emit_glabel) { + fprintf(out, "glabel %s\n", name); + } else { + fprintf(out, "%s:\n", name); + } global_idx++; } // insert all local labels at this address @@ -661,6 +667,7 @@ typedef struct char *input_file; char *output_file; int merge_pseudo; + int emit_glabel; asm_syntax syntax; } arg_config; @@ -672,16 +679,18 @@ static arg_config default_args = NULL, // input_file NULL, // output_file 0, // merge_pseudo + 0, // emit_glabel ASM_GAS, // GNU as }; static void print_usage(void) { - ERROR("Usage: mipsdisasm [-o OUTPUT] [-p] [-s ASSEMBLER] [-v] ROM [RANGES]\n" + ERROR("Usage: mipsdisasm [-g] [-o OUTPUT] [-p] [-s ASSEMBLER] [-v] ROM [RANGES]\n" "\n" "mipsdisasm v" MIPSDISASM_VERSION ": MIPS disassembler\n" "\n" "Optional arguments:\n" + " -g emit \"glabel name\" for global labels\n" " -o OUTPUT output filename (default: stdout)\n" " -p emit pseudoinstructions for related instructions\n" " -s SYNTAX assembler syntax to use [gas, armips] (default: gas)\n" @@ -724,6 +733,9 @@ static void parse_arguments(int argc, char *argv[], arg_config *config) for (int i = 1; i < argc; i++) { if (argv[i][0] == '-') { switch (argv[i][1]) { + case 'g': + config->emit_glabel = 1; + break; case 'o': if (++i >= argc) { print_usage(); @@ -834,14 +846,14 @@ int main(int argc, char *argv[]) break; } - state = disasm_state_init(args.syntax, args.merge_pseudo); + state = disasm_state_init(args.syntax, args.merge_pseudo, args.emit_glabel); // run first pass disassembler on each section for (int i = 0; i < args.range_count; i++) { range *r = &args.ranges[i]; INFO("Disassembling range 0x%X-0x%X at 0x%08X\n", r->start, r->start + r->length, r->vaddr); - (void)mipsdisasm_pass1(data, r->start, r->length, r->vaddr, state); + mipsdisasm_pass1(data, r->start, r->length, r->vaddr, state); } // output global labels not in asm sections diff --git a/mipsdisasm.h b/mipsdisasm.h index 9afb174..605d02b 100644 --- a/mipsdisasm.h +++ b/mipsdisasm.h @@ -13,8 +13,9 @@ typedef enum // allocate and initialize disassembler state to be passed into disassembler routines // syntax: assembler syntax to use // merge_pseudo: if true, attempt to link pseudo instructions +// emit_glabel: if true, emit "glabel name" instead of "name:" for global labels // returns disassembler state -disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo); +disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo, int emit_glabel); // free disassembler state allocated during pass1 // state: disassembler state returned from disasm_state_alloc() or mipsdisasm_pass1() diff --git a/n64split.c b/n64split.c index f569d03..de0c1dd 100644 --- a/n64split.c +++ b/n64split.c @@ -2355,7 +2355,7 @@ int main(int argc, char *argv[]) } // add config labels to disasm state labels - state = disasm_state_init(ASM_GAS, 1); + state = disasm_state_init(ASM_GAS, 1, 1); for (i = 0; i < config.label_count; i++) { disasm_label_add(state, config.labels[i].name, config.labels[i].ram_addr); } From b8ab8ad1419085ae573b81b44d7406e4455356fd Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 18:40:00 +0100 Subject: [PATCH 05/14] Fix disasm_label_lookup return value --- mipsdisasm.c | 10 +++++----- mipsdisasm.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index df0cf2e..f74e9fe 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -391,14 +391,14 @@ void disasm_label_add(disasm_state *state, const char *name, unsigned int vaddr) int disasm_label_lookup(const disasm_state *state, unsigned int vaddr, char *name) { - int found = 0; int id = labels_find(&state->globals, vaddr); if (id >= 0) { - strcpy(name, state->globals.labels[id].name); - found = 1; + strcpy(name, state->globals.items[id].name); + return 1; + } else { + sprintf(name, "0x%08X", vaddr); + return 0; } - sprintf(name, "0x%08X", vaddr); - return found; } void mipsdisasm_pass1(unsigned char *data, unsigned int offset, unsigned int length, unsigned int vaddr, disasm_state *state) diff --git a/mipsdisasm.h b/mipsdisasm.h index 605d02b..0ecccb7 100644 --- a/mipsdisasm.h +++ b/mipsdisasm.h @@ -30,7 +30,7 @@ void disasm_label_add(disasm_state *state, const char *name, unsigned int vaddr) // lookup a global label from the disassembler state // state: disassembler state returned from disasm_state_alloc() or mipsdisasm_pass1() // vaddr: virtual address of label -// name: string to write label to +// name: string to write label to (or vaddr if label was not found) // returns 1 if found, 0 otherwise int disasm_label_lookup(const disasm_state *state, unsigned int vaddr, char *name); From 6ea694641beb50e702b87c2dbfa4fb5a4af08fbc Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sat, 21 Dec 2019 00:34:55 +0100 Subject: [PATCH 06/14] Parse missing ROM start/end correctly --- mipsdisasm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mipsdisasm.c b/mipsdisasm.c index f74e9fe..b037497 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -717,6 +717,9 @@ void range_parse(range *r, const char *arg) } else if (plus) { r->length = strtoul(plus+1, NULL, 0); } + } else { + r->start = 0; + r->length = 0; } } From 2521adc496c387e507790038cd2239f6bb9658fa Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 18:42:17 +0100 Subject: [PATCH 07/14] Split out mipsdisasm header/footer printing into functions --- mipsdisasm.c | 78 +++++++++++++++++++++++++++------------------------- mipsdisasm.h | 4 +++ 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index b037497..86d9849 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -651,7 +651,42 @@ const char *disasm_get_version(void) return version; } +void print_asm_header(FILE *out, const char *output_file, asm_syntax syntax) { + switch (syntax) { + case ASM_GAS: + fprintf(out, ".set noat # allow manual use of $at\n"); + fprintf(out, ".set noreorder # don't insert nops after branches\n\n"); + break; + case ASM_ARMIPS: + { + char output_binary[FILENAME_MAX]; + if (output_file == NULL) { + strcpy(output_binary, "test.bin"); + } else { + const char *base = basename(output_file); + generate_filename(base, output_binary, "bin"); + } + fprintf(out, ".n64\n"); + fprintf(out, ".create \"%s\", 0x%08X\n\n", output_binary, 0); + break; + } + default: + break; + } +} + +void print_asm_footer(FILE *out, asm_syntax syntax) { + switch (syntax) { + case ASM_ARMIPS: + fprintf(out, "\n.close\n"); + break; + default: + break; + } +} + #ifdef MIPSDISASM_STANDALONE + typedef struct { unsigned int start; @@ -663,7 +698,6 @@ typedef struct { range *ranges; int range_count; - unsigned int vaddr; char *input_file; char *output_file; int merge_pseudo; @@ -675,7 +709,6 @@ static arg_config default_args = { NULL, // ranges 0, // range_count - 0x0, // vaddr NULL, // input_file NULL, // output_file 0, // merge_pseudo @@ -816,6 +849,8 @@ int main(int argc, char *argv[]) } } + state = disasm_state_init(args.syntax, args.merge_pseudo, args.emit_glabel); + // if no ranges specified or if only vaddr specified, add one of entire input file if (args.range_count < 1 || (args.range_count == 1 && args.ranges[0].length == 0)) { if (args.range_count < 1) { @@ -826,31 +861,6 @@ int main(int argc, char *argv[]) args.range_count = 1; } - // assembler header output - switch (args.syntax) { - case ASM_GAS: - fprintf(out, ".set noat # allow manual use of $at\n"); - fprintf(out, ".set noreorder # don't insert nops after branches\n\n"); - break; - case ASM_ARMIPS: - { - char output_binary[FILENAME_MAX]; - if (args.output_file == NULL) { - strcpy(output_binary, "test.bin"); - } else { - const char *base = basename(args.output_file); - generate_filename(base, output_binary, "bin"); - } - fprintf(out, ".n64\n"); - fprintf(out, ".create \"%s\", 0x%08X\n\n", output_binary, 0); - break; - } - default: - break; - } - - state = disasm_state_init(args.syntax, args.merge_pseudo, args.emit_glabel); - // run first pass disassembler on each section for (int i = 0; i < args.range_count; i++) { range *r = &args.ranges[i]; @@ -859,6 +869,8 @@ int main(int argc, char *argv[]) mipsdisasm_pass1(data, r->start, r->length, r->vaddr, state); } + print_asm_header(out, args.output_file, args.syntax); + // output global labels not in asm sections if (args.syntax == ASM_ARMIPS) { for (int i = 0; i < state->globals.count; i++) { @@ -889,17 +901,9 @@ int main(int argc, char *argv[]) mipsdisasm_pass2(out, state, r->start); } - disasm_state_free(state); - - // assembler footer output - switch (args.syntax) { - case ASM_ARMIPS: - fprintf(out, "\n.close\n"); - break; - default: - break; - } + print_asm_footer(out, args.syntax); + disasm_state_free(state); free(data); return EXIT_SUCCESS; diff --git a/mipsdisasm.h b/mipsdisasm.h index 0ecccb7..e9428d5 100644 --- a/mipsdisasm.h +++ b/mipsdisasm.h @@ -52,4 +52,8 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset); // get version string of raw disassembler const char *disasm_get_version(void); +// semi-internal functions for printing beginning/end of assembly output +void print_asm_header(FILE *out, const char *output_file, asm_syntax syntax); +void print_asm_footer(FILE *out, asm_syntax syntax); + #endif // MIPSDISASM_H_ From 21a1c335918d8a158ee52c53c939525a8a5cae71 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 18:44:55 +0100 Subject: [PATCH 08/14] Improve gitignore --- .gitignore | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index feb1637..606a030 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,18 @@ # Executables *.exe +*.out +/sm64compress +/n64cksum +/mipsdisasm +/sm64extend +/f3d +/f3d2obj +/sm64geo +/n64graphics +/mio0 +/n64split +/sm64walk + +# Makefile dependencies +*.d From 45bdfee8b255833bc9e1ad52d6bd60e06c79e9c7 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 20:14:52 +0100 Subject: [PATCH 09/14] API for telling mipsdisasm about relocations --- mipsdisasm.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++---- mipsdisasm.h | 9 ++++++ 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 86d9849..4b89ed9 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -41,6 +41,15 @@ typedef struct DEFINE_VEC(asm_label) +typedef struct +{ + unsigned int offset; + char name[60]; + int addend; +} asm_reloc; + +DEFINE_VEC(asm_reloc) + typedef struct { // copied from cs_insn structure @@ -77,6 +86,7 @@ DEFINE_VEC(asm_block) typedef struct _disasm_state { vec(asm_label) globals; + vec(asm_reloc) relocs; vec(asm_block) blocks; csh handle; @@ -112,11 +122,23 @@ static int label_cmp(const void *a, const void *b) } } +static int reloc_cmp(const void *a, const void *b) +{ + const asm_reloc *ara = a; + const asm_reloc *arb = b; + return (ara->offset > arb->offset) - (arb->offset > ara->offset); +} + static void labels_sort(vec(asm_label) *vec) { qsort(vec->items, vec->count, sizeof(vec->items[0]), label_cmp); } +static void relocs_sort(vec(asm_reloc) *vec) +{ + qsort(vec->items, vec->count, sizeof(vec->items[0]), reloc_cmp); +} + // vec: label vector to search in // vaddr: virtual address to find // returns index in vec->items if found, -1 otherwise @@ -351,6 +373,7 @@ disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo, int emit_gl { disasm_state *state = malloc(sizeof(*state)); vec_alloc(state->globals, 128); + vec_alloc(state->relocs, 128); vec_alloc(state->blocks, 128); state->syntax = syntax; @@ -379,6 +402,7 @@ void disasm_state_free(disasm_state *state) } } vec_free(state->blocks); + vec_free(state->relocs); vec_free(state->globals); cs_close(&state->handle); } @@ -401,6 +425,14 @@ int disasm_label_lookup(const disasm_state *state, unsigned int vaddr, char *nam } } +void disasm_reloc_add(disasm_state *state, unsigned int offset, const char *name, int addend) +{ + asm_reloc *r = vec_push(state->relocs); + r->offset = offset; + strcpy(r->name, name); + r->addend = addend; +} + void mipsdisasm_pass1(unsigned char *data, unsigned int offset, unsigned int length, unsigned int vaddr, disasm_state *state) { asm_block *block = vec_push(state->blocks); @@ -412,9 +444,10 @@ void mipsdisasm_pass1(unsigned char *data, unsigned int offset, unsigned int len // collect all branch and jump targets disassemble_block(&data[offset], length, vaddr, state, state->blocks.count - 1); - // sort global and local labels + // sort global and local labels and relocations labels_sort(&state->globals); labels_sort(&block->locals); + relocs_sort(&state->relocs); } void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) @@ -423,6 +456,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) unsigned int vaddr; int local_idx = 0; int global_idx = 0; + int reloc_idx = 0; int label; int indent = 0; // lookup block by offset @@ -437,13 +471,16 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) exit(1); } vaddr = block->vaddr; - // skip labels before this section + // skip labels/relocations before this section while ( (global_idx < state->globals.count) && (vaddr > state->globals.items[global_idx].vaddr) ) { global_idx++; } while ( (local_idx < block->locals.count) && (vaddr > block->locals.items[local_idx].vaddr) ) { local_idx++; } + while ( (reloc_idx < state->relocs.count) && (offset > state->relocs.items[reloc_idx].offset) ) { + reloc_idx++; + } for (int i = 0; i < block->instruction_count; i++) { disasm_data *insn = &block->instructions[i]; // newline between functions @@ -471,7 +508,46 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) indent = 0; fputc(' ', out); } - if (insn->is_jump) { + if ( (reloc_idx < state->relocs.count) && (offset == state->relocs.items[reloc_idx].offset) ) { + asm_reloc *reloc = &state->relocs.items[reloc_idx]; + reloc_idx++; + fprintf(out, "%-5s ", insn->mnemonic); + for (int o = 0; o < insn->op_count; o++) { + cs_mips_op *op = &insn->operands[o]; + if (o > 0) { + fprintf(out, ", "); + } + switch (op->type) { + case MIPS_OP_REG: + fprintf(out, "$%s", cs_reg_name(state->handle, op->reg)); + break; + case MIPS_OP_IMM: + case MIPS_OP_MEM: + { + char strAddend[32] = {0}; + if (reloc->addend > 0) { + sprintf(strAddend, " + 0x%X", reloc->addend); + } else if (reloc->addend < 0) { + sprintf(strAddend, " - 0x%X", -(unsigned)reloc->addend); + } + if (insn->id == MIPS_INS_JAL || insn->id == MIPS_INS_J) { + fprintf(out, "%s%s", reloc->name, strAddend); + } else if (insn->id == MIPS_INS_LUI) { + fprintf(out, "%%hi(%s%s)", reloc->name, strAddend); + } else { + fprintf(out, "%%lo(%s%s)", reloc->name, strAddend); + } + if (op->type == MIPS_OP_MEM) { + fprintf(out, "($%s)", cs_reg_name(state->handle, op->mem.base)); + } + break; + } + default: + break; + } + } + fprintf(out, "\n"); + } else if (insn->is_jump) { indent = 1; fprintf(out, "%-5s ", insn->mnemonic); if (insn->id == MIPS_INS_JAL || insn->id == MIPS_INS_BAL || insn->id == MIPS_INS_J) { @@ -484,16 +560,17 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset) } } else { for (int o = 0; o < insn->op_count; o++) { + cs_mips_op *op = &insn->operands[o]; if (o > 0) { fprintf(out, ", "); } - switch (insn->operands[o].type) { + switch (op->type) { case MIPS_OP_REG: - fprintf(out, "$%s", cs_reg_name(state->handle, insn->operands[o].reg)); + fprintf(out, "$%s", cs_reg_name(state->handle, op->reg)); break; case MIPS_OP_IMM: { - unsigned int branch_target = (unsigned int)insn->operands[o].imm; + unsigned int branch_target = (unsigned int)op->imm; label = labels_find(&block->locals, branch_target); if (label >= 0) { fprintf(out, "%s", block->locals.items[label].name); diff --git a/mipsdisasm.h b/mipsdisasm.h index e9428d5..ea21cbb 100644 --- a/mipsdisasm.h +++ b/mipsdisasm.h @@ -34,6 +34,15 @@ void disasm_label_add(disasm_state *state, const char *name, unsigned int vaddr) // returns 1 if found, 0 otherwise int disasm_label_lookup(const disasm_state *state, unsigned int vaddr, char *name); +// Add a .text section relocation to the disassembler state, overriding merge_pseudo heuristics. +// Whether the relocation is R_MIPS_LO16 (%lo), R_MIPS_HI16 (%hi) or R_MIPS_26 is implicit based on instruction mnemonic. +// +// state: disassembler state returned from disasm_state_alloc() or mipsdisasm_pass1() +// offset: buffer offset to apply the relocation to +// name: symbol name +// addend: constant to add to the symbol address (typically 0) +void disasm_reloc_add(disasm_state *state, unsigned int offset, const char *name, int addend); + // first pass of disassembler - collects procedures called and sorts them // data: buffer containing raw MIPS assembly // offset: buffer offset to start at From 6564dbbfe75c79dcdcd72febd3a03bf170a04f8b Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 23:03:52 +0100 Subject: [PATCH 10/14] Fix typos in mipsdisasm usage --- mipsdisasm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 4b89ed9..23ce1c7 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -795,7 +795,7 @@ static arg_config default_args = static void print_usage(void) { - ERROR("Usage: mipsdisasm [-g] [-o OUTPUT] [-p] [-s ASSEMBLER] [-v] ROM [RANGES]\n" + ERROR("Usage: mipsdisasm [-g] [-o OUTPUT] [-p] [-s SYNTAX] [-v] ROM [RANGES]\n" "\n" "mipsdisasm v" MIPSDISASM_VERSION ": MIPS disassembler\n" "\n" @@ -809,7 +809,7 @@ static void print_usage(void) "Arguments:\n" " FILE input binary file to disassemble\n" " [RANGES] optional list of ranges (default: entire input file)\n" - " format: :[-] or :[+]\n" + " format: [:-] or [:+]\n" " example: 0x80246000:0x1000-0x0E6258\n"); exit(EXIT_FAILURE); } From 447025fdcb278bfe377609e80e2f9845cc350c06 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sun, 15 Dec 2019 23:05:48 +0100 Subject: [PATCH 11/14] Add comment about CS_OPT_SKIPDATA --- mipsdisasm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 23ce1c7..5071126 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -381,11 +381,14 @@ disasm_state *disasm_state_init(asm_syntax syntax, int merge_pseudo, int emit_gl state->emit_glabel = emit_glabel; // open capstone disassembler - if (cs_open(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN, &state->handle) != CS_ERR_OK) { + if (cs_open(CS_ARCH_MIPS, CS_MODE_MIPS64 | CS_MODE_BIG_ENDIAN, &state->handle) != CS_ERR_OK) { ERROR("Error initializing disassembler\n"); exit(EXIT_FAILURE); } cs_option(state->handle, CS_OPT_DETAIL, CS_OPT_ON); + + // This is kinda sketchy; the capstone documentation says that cs_insn->detail + // is undefined when CS_OPT_SKIPDATA is set. But it's useful. cs_option(state->handle, CS_OPT_SKIPDATA, CS_OPT_ON); return state; From dc3a684608d811df28d05cf1d27facf8e8831dff Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sat, 21 Dec 2019 13:03:31 +0100 Subject: [PATCH 12/14] Fix unimportant mipsdisasm memory leak --- mipsdisasm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index 5071126..f18f5fb 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -399,10 +399,9 @@ void disasm_state_free(disasm_state *state) if (state) { for (int i = 0; i < state->blocks.count; i++) { asm_block *block = &state->blocks.items[i]; - if (block->instructions) { - free(block->instructions); - block->instructions = NULL; - } + free(block->instructions); + vec_free(block->locals); + block->instructions = NULL; } vec_free(state->blocks); vec_free(state->relocs); From c90a65551b92ee01197eafd01ff73e5f94a3dd1a Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Fri, 20 Dec 2019 19:32:56 +0100 Subject: [PATCH 13/14] Version bump for mipsdisasm --- mipsdisasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mipsdisasm.c b/mipsdisasm.c index f18f5fb..667f33f 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -9,7 +9,7 @@ #include "mipsdisasm.h" #include "utils.h" -#define MIPSDISASM_VERSION "0.2+" +#define MIPSDISASM_VERSION "0.3" // typedefs #define vec(type) vec_ ## type From 60b62450bdcb4fb7dbd8e9d3b9c8dcdb920831a9 Mon Sep 17 00:00:00 2001 From: Simon Lindholm Date: Sat, 21 Dec 2019 13:05:15 +0100 Subject: [PATCH 14/14] Add elfdump tool for disassembling ELF binaries --- .gitignore | 1 + Makefile | 11 +- README.md | 1 + elf.h | 61 ++++++++++ elfdump.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++++++ mipsdisasm.c | 2 +- mipsdisasm.h | 2 +- 7 files changed, 404 insertions(+), 3 deletions(-) create mode 100644 elf.h create mode 100644 elfdump.c diff --git a/.gitignore b/.gitignore index 606a030..b00a0a1 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ /mio0 /n64split /sm64walk +/elfdump # Makefile dependencies *.d diff --git a/Makefile b/Makefile index 1e96976..4a1ad51 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ GRAPHICS_TARGET := n64graphics MIO0_TARGET := mio0 SPLIT_TARGET := n64split WALK_TARGET := sm64walk +ELFDUMP_TARGET := elfdump LIB_SRC_FILES := libmio0.c \ libsm64.c \ @@ -25,6 +26,10 @@ COMPRESS_SRC_FILES := sm64compress.c DISASM_SRC_FILES := mipsdisasm.c \ utils.c +ELFDUMP_SRC_FILES := mipsdisasm.c \ + utils.c \ + elfdump.c + EXTEND_SRC_FILES := sm64extend.c F3D_SRC_FILES := f3d.c \ @@ -92,7 +97,7 @@ default: all all: $(EXTEND_TARGET) $(COMPRESS_TARGET) $(MIO0_TARGET) $(CKSUM_TARGET) \ $(SPLIT_TARGET) $(F3D_TARGET) $(F3D2OBJ_TARGET) $(GRAPHICS_TARGET) \ - $(DISASM_TARGET) $(GEO_TARGET) $(WALK_TARGET) + $(DISASM_TARGET) $(GEO_TARGET) $(WALK_TARGET) $(ELFDUMP_TARGET) $(OBJ_DIR)/%.o: %.c @[ -d $(OBJ_DIR) ] || mkdir -p $(OBJ_DIR) @@ -135,6 +140,9 @@ $(SPLIT_TARGET): $(SPLIT_OBJ_FILES) $(WALK_TARGET): sm64walk.c $(SM64_LIB) $(CC) $(CFLAGS) -o $@ $^ +$(ELFDUMP_TARGET): $(ELFDUMP_SRC_FILES) + $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ -lcapstone + rawmips: rawmips.c utils.c $(CC) $(CFLAGS) -o $@ $^ -lcapstone @@ -151,6 +159,7 @@ clean: rm -f $(GRAPHICS_TARGET) $(GRAPHICS_TARGET).exe rm -f $(SPLIT_TARGET) $(SPLIT_TARGET).exe rm -f $(WALK_TARGET) $(WALK_TARGET).exe + rm -f $(ELFDUMP_TARGET) $(ELFDUMP_TARGET).exe -@[ -d $(OBJ_DIR) ] && rmdir --ignore-fail-on-non-empty $(OBJ_DIR) .PHONY: all clean default diff --git a/README.md b/README.md index 2977e0c..234d26b 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ There are many other smaller tools included to help with SM64 hacking. They are - n64cksum: standalone N64 checksum generator. can either do in place or output to a new file - n64graphics: converts graphics data from PNG files into RGBA or IA N64 graphics data - mipsdisasm: standalone recursive MIPS disassembler + - elfdump: disassembler for MIPS ELF object files - sm64geo: standalone SM64 geometry layout decoder ## License diff --git a/elf.h b/elf.h new file mode 100644 index 0000000..9105c4d --- /dev/null +++ b/elf.h @@ -0,0 +1,61 @@ +#include + +#define EI_DATA 5 +#define EI_NIDENT 16 +#define SHT_SYMTAB 2 +#define SHT_REL 9 +#define STN_UNDEF 0 + +#define ELF32_R_SYM(info) ((info) >> 8) +#define ELF32_R_TYPE(info) ((info) & 0xff) + +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 + +typedef uint32_t Elf32_Addr; +typedef uint32_t Elf32_Off; + +typedef struct { + uint8_t e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf32_Shdr; + +typedef struct { + uint32_t st_name; + Elf32_Addr st_value; + uint32_t st_size; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; +} Elf32_Sym; + +typedef struct { + Elf32_Addr r_offset; + uint32_t r_info; +} Elf32_Rel; diff --git a/elfdump.c b/elfdump.c new file mode 100644 index 0000000..334902a --- /dev/null +++ b/elfdump.c @@ -0,0 +1,329 @@ +#include +#include +#include +#include +#include "mipsdisasm.h" +#include "utils.h" +#include "elf.h" + +#define ELFDUMP_VERSION "0.1" + +#define u32be(x) (uint32_t)(((x & 0xff) << 24) + ((x & 0xff00) << 8) + ((x & 0xff0000) >> 8) + ((uint32_t)(x) >> 24)) +#define u16be(x) (uint16_t)(((x & 0xff) << 8) + ((x & 0xff00) >> 8)) + +typedef struct +{ + unsigned int start; + unsigned int length; + unsigned int vaddr; +} range; + +typedef struct +{ + int has_vaddr; + unsigned int vaddr; + char *input_file; + char *output_file; + int merge_pseudo; + int emit_glabel; + asm_syntax syntax; +} arg_config; + +static arg_config default_args = +{ + 0, // has_vaddr + 0x0, // vaddr + NULL, // input_file + NULL, // output_file + 0, // merge_pseudo + 1, // emit_glabel + ASM_GAS, // GNU as +}; + +static void print_usage(void) +{ + ERROR("Usage: elfdump [-g] [-o OUTPUT] [-p] [-s SYNTAX] [-v] OBJFILE [VADDR]\n" + "\n" + "elfdump v" ELFDUMP_VERSION ": MIPS ELF object file disassembler\n" + "\n" + "Optional arguments:\n" + " -g emit \"glabel name\" for global labels\n" + " -o OUTPUT output filename (default: stdout)\n" + " -p emit pseudoinstructions for related instructions\n" + " (not useful for objfiles, but possibly ELF binaries)\n" + " -s SYNTAX assembler syntax to use [gas, armips] (default: gas)\n" + " -v verbose progress output\n" + "\n" + "Arguments:\n" + " OBJFILE ELF object file to disassemble .text section for\n" + " [VADDR] virtual address of the first instruction\n"); + exit(EXIT_FAILURE); +} + +// parse command line arguments +static void parse_arguments(int argc, char *argv[], arg_config *config) +{ + int has_file = 0; + if (argc < 2) { + print_usage(); + exit(1); + } + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'g': + config->emit_glabel = 1; + break; + case 'o': + if (++i >= argc) { + print_usage(); + } + config->output_file = argv[i]; + break; + case 'p': + config->merge_pseudo = 1; + break; + case 's': + { + if (++i >= argc) { + print_usage(); + } + if ((0 == strcasecmp("gas", argv[i])) || + (0 == strcasecmp("gnu", argv[i]))) { + config->syntax = ASM_GAS; + } else if (0 == strcasecmp("armips", argv[i])) { + config->syntax = ASM_ARMIPS; + } else { + print_usage(); + } + break; + } + case 'v': + g_verbosity = 1; + break; + default: + print_usage(); + break; + } + } else { + if (!has_file) { + config->input_file = argv[i]; + has_file = 1; + } else if (!config->has_vaddr) { + config->vaddr = strtoul(argv[i], NULL, 0); + config->has_vaddr = 1; + } else { + print_usage(); + } + } + } + if (!has_file) { + print_usage(); + } +} + +static void add_reloc(disasm_state *state, unsigned int offset, const char *name, int addend, unsigned int vaddr) +{ + char label_name[256]; + if (!strcmp(name, ".text")) { + vaddr += addend; + addend = 0; + if (!disasm_label_lookup(state, vaddr, label_name)) { + sprintf(label_name, "static_%08X", vaddr); + disasm_label_add(state, label_name, vaddr); + } + name = label_name; + } + + disasm_reloc_add(state, offset, name, addend); +} + +static range parse_elf(disasm_state *state, unsigned char *data, long file_len, arg_config *args) +{ + Elf32_Ehdr *ehdr; + Elf32_Shdr *shdr, *str_shdr, *sym_shdr, *sym_strtab; + int text_section_index = -1; + int symtab_section_index = -1; + uint32_t text_offset = 0; + uint32_t vaddr_adj = 0; + range out_range; + + if (file_len < 4 || data[0] != 0x7f || data[1] != 'E' || data[2] != 'L' || data[3] != 'F') { + ERROR("Not an ELF file.\n"); + exit(EXIT_FAILURE); + } + + ehdr = (Elf32_Ehdr *) data; + if (ehdr->e_ident[EI_DATA] != 2 || u16be(ehdr->e_machine) != 8) { + ERROR("Not big-endian MIPS.\n"); + exit(EXIT_FAILURE); + } + + if (u16be(ehdr->e_shstrndx) == 0) { + // (We could look at program headers instead in this case.) + ERROR("Missing section headers; stripped binaries are not yet supported.\n"); + exit(EXIT_FAILURE); + } + +#define SECTION(index) (Elf32_Shdr *)(data + u32be(ehdr->e_shoff) + (index) * u16be(ehdr->e_shentsize)) +#define STR(strtab, offset) (const char *)(data + u32be(strtab->sh_offset) + offset) + + str_shdr = SECTION(u16be(ehdr->e_shstrndx)); + for (int i = 0; i < u16be(ehdr->e_shnum); i++) { + shdr = SECTION(i); + const char *name = STR(str_shdr, u32be(shdr->sh_name)); + if (memcmp(name, ".text", 5) == 0) { + text_offset = u32be(shdr->sh_offset); + if (!args->has_vaddr) + vaddr_adj = out_range.vaddr - u32be(shdr->sh_addr); + else + out_range.vaddr = u32be(shdr->sh_addr); + vaddr_adj = out_range.vaddr - u32be(shdr->sh_addr); + out_range.length = u32be(shdr->sh_size); + out_range.start = text_offset; + text_section_index = i; + } + if (u32be(shdr->sh_type) == SHT_SYMTAB) { + symtab_section_index = i; + } + } + + if (text_section_index == -1) { + ERROR("Missing .text section.\n"); + exit(EXIT_FAILURE); + } + + if (symtab_section_index == -1) { + ERROR("Missing symtab section.\n"); + exit(EXIT_FAILURE); + } + + // add symbols + sym_shdr = SECTION(symtab_section_index); + sym_strtab = SECTION(u32be(sym_shdr->sh_link)); + + assert(u32be(sym_shdr->sh_entsize) == sizeof(Elf32_Sym)); + for (unsigned int i = 0; i < u32be(sym_shdr->sh_size); i += sizeof(Elf32_Sym)) { + Elf32_Sym *sym = (Elf32_Sym *)(data + u32be(sym_shdr->sh_offset) + i); + const char *name = STR(sym_strtab, u32be(sym->st_name)); + uint32_t addr = u32be(sym->st_value); + if (u16be(sym->st_shndx) != text_section_index || name[0] == '.') { + continue; + } + addr += vaddr_adj; + disasm_label_add(state, name, addr); + } + + // add relocations + for (int i = 0; i < u16be(ehdr->e_shnum); i++) { + Elf32_Rel *prevHi = NULL; + shdr = SECTION(i); + if (u32be(shdr->sh_type) != SHT_REL || u32be(shdr->sh_info) != (unsigned int) text_section_index) + continue; + + assert(u32be(shdr->sh_link) == (unsigned int) symtab_section_index); + assert(u32be(shdr->sh_entsize) == sizeof(Elf32_Rel)); + for (unsigned int i = 0; i < u32be(shdr->sh_size); i += sizeof(Elf32_Rel)) { + Elf32_Rel *rel = (Elf32_Rel *)(data + u32be(shdr->sh_offset) + i); + uint32_t offset = text_offset + u32be(rel->r_offset); + uint32_t symIndex = ELF32_R_SYM(u32be(rel->r_info)); + uint32_t rtype = ELF32_R_TYPE(u32be(rel->r_info)); + const char *symName = "0"; + if (symIndex != STN_UNDEF) { + Elf32_Sym *sym = (Elf32_Sym *)(data + u32be(sym_shdr->sh_offset) + symIndex * sizeof(Elf32_Sym)); + symName = STR(sym_strtab, u32be(sym->st_name)); + } + + if (rtype == R_MIPS_HI16) { + if (prevHi != NULL) { + ERROR("Consecutive R_MIPS_HI16.\n"); + exit(EXIT_FAILURE); + } + prevHi = rel; + continue; + } + if (rtype == R_MIPS_LO16) { + int32_t addend = (int16_t)((data[offset + 2] << 8) + data[offset + 3]); + if (prevHi != NULL) { + uint32_t offset2 = text_offset + u32be(prevHi->r_offset); + addend += (uint32_t)((data[offset2 + 2] << 8) + data[offset2 + 3]) << 16; + add_reloc(state, offset2, symName, addend, out_range.vaddr); + } + prevHi = NULL; + add_reloc(state, offset, symName, addend, out_range.vaddr); + } + else if (rtype == R_MIPS_26) { + int32_t addend = (u32be(*(uint32_t*)(data + offset)) & ((1 << 26) - 1)) << 2; + if (addend >= (1 << 27)) { + addend -= 1 << 28; + } + add_reloc(state, offset, symName, addend, out_range.vaddr); + } + else { + ERROR("Bad relocation type %d.\n", rtype); + exit(EXIT_FAILURE); + } + } + if (prevHi != NULL) { + ERROR("R_MIPS_HI16 without matching R_MIPS_LO16.\n"); + exit(EXIT_FAILURE); + } + } + + return out_range; +} +#undef SECTION +#undef STR + +int main(int argc, char *argv[]) +{ + arg_config args; + long file_len; + disasm_state *state; + unsigned char *data; + FILE *out; + range r; + + // load defaults and parse arguments + out = stdout; + args = default_args; + parse_arguments(argc, argv, &args); + + // read input file + INFO("Reading input file '%s'\n", args.input_file); + file_len = read_file(args.input_file, &data); + if (file_len <= 0) { + ERROR("Error reading input file '%s'\n", args.input_file); + return EXIT_FAILURE; + } + + // if specified, open output file + if (args.output_file != NULL) { + INFO("Opening output file '%s'\n", args.output_file); + out = fopen(args.output_file, "w"); + if (out == NULL) { + ERROR("Error opening output file '%s'\n", args.output_file); + return EXIT_FAILURE; + } + } + + state = disasm_state_init(args.syntax, args.merge_pseudo, args.emit_glabel); + + r = parse_elf(state, data, file_len, &args); + + // run first pass disassembler + INFO("Disassembling range 0x%X-0x%X at 0x%08X\n", r.start, r.start + r.length, r.vaddr); + mipsdisasm_pass1(data, r.start, r.length, r.vaddr, state); + + // second pass, generate output + print_asm_header(out, args.output_file, args.syntax); + if (args.syntax == ASM_ARMIPS) { + fprintf(out, ".headersize 0x%08X\n\n", r.vaddr); + } + mipsdisasm_pass2(out, state, r.start); + print_asm_footer(out, args.syntax); + + disasm_state_free(state); + free(data); + return EXIT_SUCCESS; +} diff --git a/mipsdisasm.c b/mipsdisasm.c index 667f33f..1a2ec58 100644 --- a/mipsdisasm.c +++ b/mipsdisasm.c @@ -816,7 +816,7 @@ static void print_usage(void) exit(EXIT_FAILURE); } -void range_parse(range *r, const char *arg) +static void range_parse(range *r, const char *arg) { char *colon = strchr(arg, ':'); r->vaddr = strtoul(arg, NULL, 0); diff --git a/mipsdisasm.h b/mipsdisasm.h index ea21cbb..7040c51 100644 --- a/mipsdisasm.h +++ b/mipsdisasm.h @@ -61,7 +61,7 @@ void mipsdisasm_pass2(FILE *out, disasm_state *state, unsigned int offset); // get version string of raw disassembler const char *disasm_get_version(void); -// semi-internal functions for printing beginning/end of assembly output +// internal functions for use in mipsdisasm and elfdump void print_asm_header(FILE *out, const char *output_file, asm_syntax syntax); void print_asm_footer(FILE *out, asm_syntax syntax);