From bf6227e9295110ab97c8cf3237914acd6cfa09b0 Mon Sep 17 00:00:00 2001 From: Dennis Goodlett Date: Fri, 27 Dec 2024 16:02:32 -0500 Subject: [PATCH] Add last pseudo instructions to pickle ##arch --- libr/arch/p/pickle/dis_helper.inc | 167 +++++++++++++++++++++++ libr/arch/p/pickle/plugin.c | 177 ++----------------------- libr/arch/p/pickle/pseudo.c | 211 ++++++++++++++++++++++-------- 3 files changed, 334 insertions(+), 221 deletions(-) create mode 100644 libr/arch/p/pickle/dis_helper.inc diff --git a/libr/arch/p/pickle/dis_helper.inc b/libr/arch/p/pickle/dis_helper.inc new file mode 100644 index 0000000000000..ef5a3b4fd786c --- /dev/null +++ b/libr/arch/p/pickle/dis_helper.inc @@ -0,0 +1,167 @@ +enum opcode { + OP_MARK = '(', + OP_STOP = '.', + OP_POP = '0', + OP_POP_MARK = '1', + OP_DUP = '2', + OP_FLOAT = 'F', + OP_INT = 'I', + OP_BININT = 'J', + OP_BININT1 = 'K', + OP_LONG = 'L', + OP_BININT2 = 'M', + OP_NONE = 'N', + OP_PERSID = 'P', + OP_BINPERSID = 'Q', + OP_REDUCE = 'R', + OP_STRING = 'S', + OP_BINSTRING = 'T', + OP_SHORT_BINSTRING = 'U', + OP_UNICODE = 'V', + OP_BINUNICODE = 'X', + OP_APPEND = 'a', + OP_BUILD = 'b', + OP_GLOBAL = 'c', + OP_DICT = 'd', + OP_EMPTY_DICT = '}', + OP_APPENDS = 'e', + OP_GET = 'g', + OP_BINGET = 'h', + OP_INST = 'i', + OP_LONG_BINGET = 'j', + OP_LIST = 'l', + OP_EMPTY_LIST = ']', + OP_OBJ = 'o', + OP_PUT = 'p', + OP_BINPUT = 'q', + OP_LONG_BINPUT = 'r', + OP_SETITEM = 's', + OP_TUPLE = 't', + OP_EMPTY_TUPLE = ')', + OP_SETITEMS = 'u', + OP_BINFLOAT = 'G', + + // Protocol 2. + OP_PROTO = '\x80', + OP_NEWOBJ = '\x81', + OP_EXT1 = '\x82', + OP_EXT2 = '\x83', + OP_EXT4 = '\x84', + OP_TUPLE1 = '\x85', + OP_TUPLE2 = '\x86', + OP_TUPLE3 = '\x87', + OP_NEWTRUE = '\x88', + OP_NEWFALSE = '\x89', + OP_LONG1 = '\x8a', + OP_LONG4 = '\x8b', + + // Protocol 3 (Python 3.x) + OP_BINBYTES = 'B', + OP_SHORT_BINBYTES = 'C', + + // Protocol 4 + OP_SHORT_BINUNICODE = '\x8c', + OP_BINUNICODE8 = '\x8d', + OP_BINBYTES8 = '\x8e', + OP_EMPTY_SET = '\x8f', + OP_ADDITEMS = '\x90', + OP_FROZENSET = '\x91', + OP_NEWOBJ_EX = '\x92', + OP_STACK_GLOBAL = '\x93', + OP_MEMOIZE = '\x94', + OP_FRAME = '\x95', + + // Protocol 5 + OP_BYTEARRAY8 = '\x96', + OP_NEXT_BUFFER = '\x97', + OP_READONLY_BUFFER = '\x98', + + // not a real pickle opcode, indicates failure in parsing + OP_FAILURE = '\xff' +}; + +struct opmap { + const char *const name; + char op; +}; + +static const struct opmap op_name_map[] = { + { "mark", '(' }, + { "stop", '.' }, + { "pop", '0' }, + { "pop_mark", '1' }, + { "dup", '2' }, + { "float", 'F' }, + { "int", 'I' }, + { "binint", 'J' }, + { "binint1", 'K' }, + { "long", 'L' }, + { "binint2", 'M' }, + { "none", 'N' }, + { "persid", 'P' }, + { "binpersid", 'Q' }, + { "reduce", 'R' }, + { "string", 'S' }, + { "binstring", 'T' }, + { "short_binstring", 'U' }, + { "unicode", 'V' }, + { "binunicode", 'X' }, + { "append", 'a' }, + { "build", 'b' }, + { "global", 'c' }, + { "dict", 'd' }, + { "empty_dict", '}' }, + { "appends", 'e' }, + { "get", 'g' }, + { "binget", 'h' }, + { "inst", 'i' }, + { "long_binget", 'j' }, + { "list", 'l' }, + { "empty_list", ']' }, + { "obj", 'o' }, + { "put", 'p' }, + { "binput", 'q' }, + { "long_binput", 'r' }, + { "setitem", 's' }, + { "tuple", 't' }, + { "empty_tuple", ')' }, + { "setitems", 'u' }, + { "binfloat", 'G' }, + { "proto", '\x80' }, + { "newobj", '\x81' }, + { "ext1", '\x82' }, + { "ext2", '\x83' }, + { "ext4", '\x84' }, + { "tuple1", '\x85' }, + { "tuple2", '\x86' }, + { "tuple3", '\x87' }, + { "newtrue", '\x88' }, + { "newfalse", '\x89' }, + { "long1", '\x8a' }, + { "long4", '\x8b' }, + { "binbytes", 'B' }, + { "short_binbytes", 'C' }, + { "short_binunicode", '\x8c' }, + { "binunicode8", '\x8d' }, + { "binbytes8", '\x8e' }, + { "empty_set", '\x8f' }, + { "additems", '\x90' }, + { "frozenset", '\x91' }, + { "newobj_ex", '\x92' }, + { "stack_global", '\x93' }, + { "memoize", '\x94' }, + { "frame", '\x95' }, + { "bytearray8", '\x96' }, + { "next_buffer", '\x97' }, + { "readonly_buffer", '\x98' } +}; + +static inline int name_to_op(const char *opstr) { + size_t i; + for (i = 0; i < R_ARRAY_SIZE (op_name_map); i++) { + if (!r_str_casecmp (opstr, op_name_map[i].name)) { + return op_name_map[i].op; + } + } + return OP_FAILURE; +} diff --git a/libr/arch/p/pickle/plugin.c b/libr/arch/p/pickle/plugin.c index 4acf23d512ad6..9cb28a68acfde 100644 --- a/libr/arch/p/pickle/plugin.c +++ b/libr/arch/p/pickle/plugin.c @@ -1,164 +1,10 @@ /* radare2 - LGPL - Copyright 2022-2024 - bemodtwz */ #include +#include "dis_helper.inc" #define MAXSTRLEN 128 -struct opmap { - const char * const name; - const char op; -}; - -enum opcode { - OP_MARK = '(', - OP_STOP = '.', - OP_POP = '0', - OP_POP_MARK = '1', - OP_DUP = '2', - OP_FLOAT = 'F', - OP_INT = 'I', - OP_BININT = 'J', - OP_BININT1 = 'K', - OP_LONG = 'L', - OP_BININT2 = 'M', - OP_NONE = 'N', - OP_PERSID = 'P', - OP_BINPERSID = 'Q', - OP_REDUCE = 'R', - OP_STRING = 'S', - OP_BINSTRING = 'T', - OP_SHORT_BINSTRING = 'U', - OP_UNICODE = 'V', - OP_BINUNICODE = 'X', - OP_APPEND = 'a', - OP_BUILD = 'b', - OP_GLOBAL = 'c', - OP_DICT = 'd', - OP_EMPTY_DICT = '}', - OP_APPENDS = 'e', - OP_GET = 'g', - OP_BINGET = 'h', - OP_INST = 'i', - OP_LONG_BINGET = 'j', - OP_LIST = 'l', - OP_EMPTY_LIST = ']', - OP_OBJ = 'o', - OP_PUT = 'p', - OP_BINPUT = 'q', - OP_LONG_BINPUT = 'r', - OP_SETITEM = 's', - OP_TUPLE = 't', - OP_EMPTY_TUPLE = ')', - OP_SETITEMS = 'u', - OP_BINFLOAT = 'G', - - // Protocol 2. - OP_PROTO = '\x80', - OP_NEWOBJ = '\x81', - OP_EXT1 = '\x82', - OP_EXT2 = '\x83', - OP_EXT4 = '\x84', - OP_TUPLE1 = '\x85', - OP_TUPLE2 = '\x86', - OP_TUPLE3 = '\x87', - OP_NEWTRUE = '\x88', - OP_NEWFALSE = '\x89', - OP_LONG1 = '\x8a', - OP_LONG4 = '\x8b', - - // Protocol 3 (Python 3.x) - OP_BINBYTES = 'B', - OP_SHORT_BINBYTES = 'C', - - // Protocol 4 - OP_SHORT_BINUNICODE = '\x8c', - OP_BINUNICODE8 = '\x8d', - OP_BINBYTES8 = '\x8e', - OP_EMPTY_SET = '\x8f', - OP_ADDITEMS = '\x90', - OP_FROZENSET = '\x91', - OP_NEWOBJ_EX = '\x92', - OP_STACK_GLOBAL = '\x93', - OP_MEMOIZE = '\x94', - OP_FRAME = '\x95', - - // Protocol 5 - OP_BYTEARRAY8 = '\x96', - OP_NEXT_BUFFER = '\x97', - OP_READONLY_BUFFER = '\x98' -}; - -static const struct opmap op_name_map[] = { - { "mark", '(' }, - { "stop", '.' }, - { "pop", '0' }, - { "pop_mark", '1' }, - { "dup", '2' }, - { "float", 'F' }, - { "int", 'I' }, - { "binint", 'J' }, - { "binint1", 'K' }, - { "long", 'L' }, - { "binint2", 'M' }, - { "none", 'N' }, - { "persid", 'P' }, - { "binpersid", 'Q' }, - { "reduce", 'R' }, - { "string", 'S' }, - { "binstring", 'T' }, - { "short_binstring", 'U' }, - { "unicode", 'V' }, - { "binunicode", 'X' }, - { "append", 'a' }, - { "build", 'b' }, - { "global", 'c' }, - { "dict", 'd' }, - { "empty_dict", '}' }, - { "appends", 'e' }, - { "get", 'g' }, - { "binget", 'h' }, - { "inst", 'i' }, - { "long_binget", 'j' }, - { "list", 'l' }, - { "empty_list", ']' }, - { "obj", 'o' }, - { "put", 'p' }, - { "binput", 'q' }, - { "long_binput", 'r' }, - { "setitem", 's' }, - { "tuple", 't' }, - { "empty_tuple", ')' }, - { "setitems", 'u' }, - { "binfloat", 'G' }, - { "proto", '\x80' }, - { "newobj", '\x81' }, - { "ext1", '\x82' }, - { "ext2", '\x83' }, - { "ext4", '\x84' }, - { "tuple1", '\x85' }, - { "tuple2", '\x86' }, - { "tuple3", '\x87' }, - { "newtrue", '\x88' }, - { "newfalse", '\x89' }, - { "long1", '\x8a' }, - { "long4", '\x8b' }, - { "binbytes", 'B' }, - { "short_binbytes", 'C' }, - { "short_binunicode", '\x8c' }, - { "binunicode8", '\x8d' }, - { "binbytes8", '\x8e' }, - { "empty_set", '\x8f' }, - { "additems", '\x90' }, - { "frozenset", '\x91' }, - { "newobj_ex", '\x92' }, - { "stack_global", '\x93' }, - { "memoize", '\x94' }, - { "frame", '\x95' }, - { "bytearray8", '\x96' }, - { "next_buffer", '\x97' }, - { "readonly_buffer", '\x98' } -}; - static inline bool valid_offset(RArch *a, ut64 addr) { RBin *bin = R_UNWRAP2 (a, binb.bin); if (bin) { @@ -745,21 +591,12 @@ static inline int assemble_n_str(char *str, ut32 cnt, ut8 *outbuf, int outsz, bo return len + 1; } -static inline bool write_op(char *opstr, ut8 *outbuf) { - size_t i; - for (i = 0; i < R_ARRAY_SIZE (op_name_map); i++) { - if (!r_str_casecmp (opstr, op_name_map[i].name)) { - *outbuf = (ut8)op_name_map[i].op; - return true; - } - } - return false; -} - static bool pickle_encode(RArchSession *s, RAnalOp *op, RArchEncodeMask mask) { const char *str = op->mnemonic; // some ops can be huge, but they should always be smaller then the mnemonics int outsz = strlen (str); + + // _outbuf is kept for free'ing while outbuff will get ++ ut8 *_outbuf = malloc (outsz); if (!_outbuf) { return false; @@ -783,8 +620,12 @@ static bool pickle_encode(RArchSession *s, RAnalOp *op, RArchEncodeMask mask) { arg = ""; } - if (write_op (opstr, outbuf)) { - char ob = (char)*outbuf; + int ob = name_to_op (opstr); + if (ob == OP_FAILURE) { + R_LOG_ERROR ("Unkonwn pickle verb: %s", opstr); + wlen = -1; + } else { + *outbuf = (ut8)ob; wlen++; outbuf++; outsz--; diff --git a/libr/arch/p/pickle/pseudo.c b/libr/arch/p/pickle/pseudo.c index 174d5050a7efa..982c6f3526284 100644 --- a/libr/arch/p/pickle/pseudo.c +++ b/libr/arch/p/pickle/pseudo.c @@ -2,66 +2,171 @@ #include #include +#include "dis_helper.inc" -static inline char *parse_no_args(const char *data) { - struct pickle_inst { - const char *name; - const char *ret; - }; - - static const struct pickle_inst insts[] = { - { "mark", "metastack.append(stack); stack = []" }, - { "stop", "return stack[-1]" }, - { "pop", "stack.pop()" }, - { "pop_mark", "stack = metastack.pop()" }, - { "dup", "stack.append(stack[-1])" }, - { "none", "stack.append(None)" }, - { "binpersid", "stack.append(persistent_load(stack.pop()))" }, - { "reduce", "stack.append(stack.pop()(stack.pop()))" }, - { "append", "stack[-1].append(stack.pop())" }, - { "build", "state = stack.pop(); set_obj_attrs(obj=stack[-1], attrs=state)" }, // this one is complicated... - { "dict", "items = stack; stack = metastack.pop(); stack.append({i[0], i[1] for i in zip(*([iter(items)]*2))})" }, - { "empty_dict", "stack.append({})" }, - { "appends", "for item in stack: metastack[-1].append(item); stack = metastack.pop()" }, - { "list", "item = stack; stack = metastack.pop(); stack.append(item)" }, - { "empty_list", "stack.append([])" }, - { "obj", "args = stack.pop(); cls = stack.pop(); stack.append(cls.__new__(cls, *args))" }, - { "setitem", "key = stack.pop(); value = stack.pop(); stack[-1][key] = value" }, - { "tuple", "items = stack; stack = metastack.pop(); stack.append(tuple(items))" }, - { "empty_tuple", "stack.append(())" }, - { "setitems", "items = stack; stack = metastack.pop(); stack[-1].update(zip(*([iter(items)]*2)))" }, - { "newobj", "args = stack.pop(); cls = stack.pop(); cls.__new__(cls, *args)" }, - { "tuple1", "stack[-1] = (stack[-1],)" }, - { "tuple2", "stack[-2:] = [(stack[-2], stack[-1])]" }, - { "tuple3", "stack[-3:] = [(stack[-3], stack[-2], stack[-1])]" }, - { "newtrue", "stack.push(True)" }, - { "newfalse", "stack.push(False)" }, - { "empty_set", "stack.push(set())" }, - { "additems", "items = stack; stack = metastack.pop(); for item in items: stack[-1].add(item)" }, - { "frozenset", "metastack[-1].append(frozenset(stack)); stack = metastack.pop()" }, - { "newobj_ex", "kwargs = stack.pop; args = stack.pop(); cls = stack.pop(); stack.append(cls.__new__(cls, *args, **kwargs))" }, - { "stack_global", "name = stack.pop(); module = stack.pop(); find_class(name, module)" }, - { "memoize", "memo[len(memo)] = stack[-1]" }, - { "next_buffer", "stack.append(next(out_of_band_buffers))" }, - { "readonly_buffer", "with memoryview(stack[-1]) as m: stack[-1] = m.toreadonly()" }, - }; +static inline char *parse_no_args(int op) { + switch (op) { + case OP_FAILURE: + return NULL; // opcode not found in table + case OP_MARK: + return strdup ("metastack.append(stack); stack = []"); + case OP_STOP: + return strdup ("return stack[-1]"); + case OP_POP: + return strdup ("stack.pop()"); + case OP_POP_MARK: + return strdup ("stack = metastack.pop()"); + case OP_DUP: + return strdup ("stack.append(stack[-1])"); + case OP_NONE: + return strdup ("stack.append(None)"); + case OP_BINPERSID: + return strdup ("stack.append(persistent_load(stack.pop()))"); + case OP_REDUCE: + return strdup ("stack.append(stack.pop()(stack.pop()))"); + case OP_APPEND: + return strdup ("stack[-1].append(stack.pop())"); + case OP_BUILD: + return strdup ("state = stack.pop(); set_obj_attrs(obj=stack[-1], attrs=state)"); // this one is complicated... + case OP_DICT: + return strdup ("items = stack; stack = metastack.pop(); stack.append({i[0], i[1] for i in zip(*([iter(items)]*2))})"); + case OP_EMPTY_DICT: + return strdup ("stack.append({})"); + case OP_APPENDS: + return strdup ("for item in stack: metastack[-1].append(item); stack = metastack.pop()"); + case OP_LIST: + return strdup ("item = stack; stack = metastack.pop(); stack.append(item)"); + case OP_EMPTY_LIST: + return strdup ("stack.append([])"); + case OP_OBJ: + return strdup ("args = stack.pop(); cls = stack.pop(); stack.append(cls.__new__(cls, *args))"); + case OP_SETITEM: + return strdup ("key = stack.pop(); value = stack.pop(); stack[-1][key] = value"); + case OP_TUPLE: + return strdup ("items = stack; stack = metastack.pop(); stack.append(tuple(items))"); + case OP_EMPTY_TUPLE: + return strdup ("stack.append(())"); + case OP_SETITEMS: + return strdup ("items = stack; stack = metastack.pop(); stack[-1].update(zip(*([iter(items)]*2)))"); + case OP_NEWOBJ: + return strdup ("args = stack.pop(); cls = stack.pop(); cls.__new__(cls, *args)"); + case OP_TUPLE1: + return strdup ("stack[-1:] = [tuple(stack[-1:])]"); + case OP_TUPLE2: + return strdup ("stack[-2:] = [tuple(stack[-2:])]"); + case OP_TUPLE3: + return strdup ("stack[-3:] = [tuple(stack[-3:])]"); + case OP_NEWTRUE: + return strdup ("stack.append(True)"); + case OP_NEWFALSE: + return strdup ("stack.append(False)"); + case OP_EMPTY_SET: + return strdup ("stack.append(set())"); + case OP_ADDITEMS: + return strdup ("items = stack; stack = metastack.pop(); for item in items: stack[-1].add(item)"); + case OP_FROZENSET: + return strdup ("metastack[-1].append(frozenset(stack)); stack = metastack.pop()"); + case OP_NEWOBJ_EX: + return strdup ("kwargs = stack.pop; args = stack.pop(); cls = stack.pop(); stack.append(cls.__new__(cls, *args, **kwargs))"); + case OP_STACK_GLOBAL: + return strdup ("name = stack.pop(); module = stack.pop(); find_class(name, module)"); + case OP_MEMOIZE: + return strdup ("memo[len(memo)] = stack[-1]"); + case OP_NEXT_BUFFER: + return strdup ("stack.append(next(out_of_band_buffers))"); + case OP_READONLY_BUFFER: + return strdup ("with memoryview(stack[-1]) as m: stack[-1] = m.toreadonly()"); + } + return NULL; +} - size_t i; - for (i = 0; i < R_ARRAY_SIZE (insts); i++) { - if (!strcmp (data, insts[i].name)) { - return strdup (insts[i].ret); - } +static inline char *parse_with_args(int op, char *args) { + switch (op) { + case OP_FAILURE: + return NULL; // opcode not found in table + case OP_FRAME: + return strdup ("pass"); + case OP_EXT1: + case OP_EXT2: + case OP_EXT4: + return r_str_newf ("stack.append(get_extension(%s))", args); + case OP_BININT: + case OP_LONG_BINPUT: + case OP_BININT2: + case OP_BININT1: + case OP_LONG4: + case OP_LONG1: + case OP_STRING: + case OP_FLOAT: + return r_str_newf ("stack.append(%s)", args); + case OP_BINSTRING: + case OP_SHORT_BINSTRING: + return r_str_newf ("stack.append(b%s)", args); + case OP_BINGET: + case OP_LONG_BINGET: + return r_str_newf ("stack.append(memo[%s])", args); + case OP_PROTO: + return r_str_newf ("proto = %s", args); + case OP_BINPUT: + return r_str_newf ("memo[%s] = stack[-1]", args); + case OP_SHORT_BINBYTES: + case OP_BINBYTES8: + case OP_BINBYTES: + return r_str_newf ("stack.append(b%s)", args); + case OP_BINUNICODE8: + case OP_BINUNICODE: + case OP_SHORT_BINUNICODE: + case OP_UNICODE: + return r_str_newf ("stack.append(str(%s, 'utf-8', 'surrogatepass'))", args); + case OP_BYTEARRAY8: + return r_str_newf ("stack.append(bytearray(b%s))", args); + case OP_INST: + return r_str_newf ("args = stack; stack = metastack[-1]; stack.append(_instantiate(find_class(*%s.split()), args))", args); + case OP_GLOBAL: + return r_str_newf ("stack.append(find_class(*%s.split()))", args); + case OP_BINFLOAT: + return r_str_newf ("stack.append(float(%s))", args); + case OP_INT: + case OP_LONG: + return r_str_newf ("stack.push(int(%s))", args); + case OP_PERSID: + return r_str_newf ("stack.append(persistent_load(%s))", args); + case OP_GET: + return r_str_newf ("stack.append(memo[int(%s)])", args); + case OP_PUT: + return r_str_newf ("memo[int(%s)] = stack[-1]", args); } + r_warn_if_reached (); return NULL; } -static char *parse(RAsmPluginSession *aps, const char *data) { +static char *pickle_parse(RAsmPluginSession *aps, const char *data) { R_RETURN_VAL_IF_FAIL (R_STR_ISNOTEMPTY (data), NULL); - const char *args = strchr (data, ' '); - if (!args) { - return parse_no_args (data); + const char *carg = strchr (data, ' '); + if (!carg) { + return parse_no_args (name_to_op (data)); } - return NULL; + + char *opstr = strdup (data); // get a non-const str to manipulate + if (!opstr) { + return NULL; + } + + char *args = &opstr[carg - data]; + if (args && *args == ' ') { + *args = '\0'; + do { + args++; + } while (*args == ' '); + } + + char *ret = NULL; + if (args) { + ret = parse_with_args (name_to_op (opstr), args); + } + + free (opstr); + return ret; } RAsmPlugin r_asm_plugin_pickle = { @@ -69,7 +174,7 @@ RAsmPlugin r_asm_plugin_pickle = { .name = "pickle", .desc = "Pickle pseudo syntax", }, - .parse = parse, + .parse = pickle_parse, }; #ifndef R2_PLUGIN_INCORE