forked from carlosgprado/MILF
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmilf.py
1608 lines (1128 loc) · 55.9 KB
/
milf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#
# milf.py
#
# Some useful methods in vulnerability discovery.
# Let the snake make your life a bit easier
#
# Implemented using the idaapi and idautils modules
import re
from idaapi import *
from idautils import *
from idc import *
########################################
# NetworkX (optional) support
# for advanced graphs
try:
import networkx as nx
import malplotlib.pyplot as plt
NetworkX = True
except:
print "[debug] Deactivating support for NetworkX library"
NetworkX = False
###################################################################################################
class IDAnalyzer():
def __init__(self, debug = False, nx_support = True):
''' initialization of the main class '''
self.import_dict = dict()
self.debug = debug
self.nx_support = nx_support
# Create a dictionary with all imports.
# It populates self.import_dict
self._enum_all_imports()
def banner(self):
''' It has to be :) '''
print "IDAnalyzer.\n"
print "it's python, biatch\n"
def mark_dangerous(self):
'''
Colorize dangerous function calls!
No arguments.
@return: True '''
dangerousFuncs = ["strcpy", "strncpy", "memmove", "memcpy", "sprintf", "lstrcpyW", "lstrcpyA", "memset"]
# Loop from start to end within the current segment
for FuncName in dangerousFuncs:
func_addr = LocByName(FuncName)
if self.debug:
print "Function %s at %08x" % (FuncName, func_addr)
# find all code references to the function
for ref in CodeRefsTo(func_addr, True):
if self.debug:
print "\tcalled from %s (%08x)" % (GetFunctionName(ref), ref)
# Color the function call *RED*
SetColor(ref, CIC_ITEM, 0x2020c0)
return True
def mark_switches(self, graph, color = 0x20c020):
'''
Convenience function. It colors all switches in a graph.
@type graph: dictionary
@param graph: Complex data structure. See connect_graph()
@type color: hex
@param color:(optional) color to mark the switches
@return: True '''
switches = self.enum_switches(graph)
for sw in switches.keys():
SetColor(sw, CIC_ITEM, color)
return True
def _enum_all_imports(self):
'''
Useful afterwards for resolving addresses to imports.
Following code has been taken shamelessly from the "ex_imports.py" distribution example :)
@rtype: dictionary
@return: dictionary containing import name & address { name : idata_ea } '''
nimps = get_import_module_qty() # How many modules imported?
if self.debug:
print "[debug] Found %d imported modules" % nimps
for i in xrange(0, nimps):
name = get_import_module_name(i)
if not name:
print "[x] Could not get import module name for #%d" % i
continue
# The import_dict dictionary will be filled
# through this callback function (_imp_cb)
enum_import_names(i, self._imp_cb)
return self.import_dict
def _imp_cb(self, ea, name, ord):
'''
Used by _enum_all_imports.
Callback function used by idaapi.enum_import_names()
@return: True '''
if not name:
self.import_dict[ord] = ea
else:
self.import_dict[name] = ea
return True
def _find_import_name(self, iaddr):
'''
Translates addresses to import names through a dictionary lookup.
@type iaddr: address
@param iaddr: Address of import
@return: name (if successful) or same argument (on failure) '''
for k in self.import_dict.keys():
if self.import_dict[k] == iaddr:
name = k
if name:
return name
else:
return iaddr
def graph_down(self, ea, graph = {}, path = set([])):
'''
Creates a downgraph of xrefs FROM this function.
Calling it recursively allow us to get infinite depth.
@type ea: ()
@param ea: address of ROOT NODE
@rtype: dictionary
@return: Dictionary of function ea's and child *addresses* { ea : [c1_ea, c2_ea, ...] } '''
graph[ea] = list() # Create a new entry on the graph dictionary {node: [child1, child2, ...], ...}
path.add(ea) # This is a set, therefore the add() method
# Iterate through all function instructions and take only call instructions
for x in [x for x in FuncItems(ea) if is_call_insn(x)]: # Take the call elements
for xref in XrefsFrom(x, XREF_FAR):
if not xref.iscode:
continue
if xref.to not in path: # Eliminates recursions
graph[ea].append(xref.to)
self.graph_down(xref.to, graph, path)
return graph
def graph_up(self, ea, graph = {}, path = set([])):
'''
Creates an upgraph of xrefs TO this function.
Calling it recursively allow us to get infinite depth.
@type ea: address
@param ea: address of ROOT NODE (bottom)
@rtype: dictionary
@return: Dictionary of function ea's and parent addresses { ea : [p1_ea, p2_ea, ...] } '''
graph[ea] = list() # Create a new entry on the graph dictionary {node: [child1, child2, ...], ...}
path.add(ea) # This is a set, therefore the add() method
for xref in XrefsTo(ea, XREF_FAR):
if not xref.iscode:
continue
func = get_func(xref.frm) # self.func -> idaapi.func_t
if not func:
continue
else:
caller_addr = func.startEA
if caller_addr not in path: # Eliminates recursions
graph[ea].append(caller_addr)
self.graph_up(caller_addr, graph, path)
return graph
def _colorize_graph(self, node_list, color = 0x2020c0):
'''
Internal method. See show_path() for an example wrapper.
It paints a *list* of functions with some color.
@type graph: List
@param graph: List of nodes_ea
@type color: hex
@param color: (optional) color to paint the functions '''
for x in node_list:
SetColor(x, CIC_FUNC, color)
return True
def reset_colorize_graph(self, c_graph):
'''
Convenience method.
Set color back to white for selected graph.
@type graph: List
@param graph: List of nodes
@note: Call with "all" string to reset the whole module. '''
WHITE = 0xffffff
if c_graph == 'all':
for function in Functions():
SetColor(function, CIC_FUNC, WHITE)
else:
self._colorize_graph(c_graph, WHITE)
return True
def _translate_ea_name(self, ea):
'''
Translates an ea to a function/import name.
@type ea: address
@param ea: address to lookup
@return: function/import name (on success) or same argument (on failure) '''
t = GetFunctionName(ea)
if not t:
if SegName(ea) == '.idata':
# The address is inside the imports section
t = self._find_import_name(ea)
if not t:
t = ea
else:
t = ea
return t
def translate_graph(self, graph):
'''
Takes a graph, { node: [child1, child2, ...], ...}
and lookup as many function names as possible.
@type graph: dictionary
@param graph: dictionary of function ea's and "child" nodes { ea : [c1_ea, c2_ea, ...] }
@rtype: dictionary
@return: same dictionary but names instead of ea's (where possible) '''
translated_graph = dict()
# This loop translates the dict keys (nodes)
for node in graph.keys():
translated_key = self._translate_ea_name(node)
translated_graph[translated_key] = list()
# This loop translates the dict values (children)
for child in graph[node]: # traverses a list
translated_graph[translated_key].append(self._translate_ea_name(child))
return translated_graph
def connect_graph(self, origin, destination):
'''
Take a wild guess...
@type origin: string
@param origin: Function NAME
@type destination: string
@param destination: Function NAME
@rtype: dictionary
@return: Complex data {
node_ea : {
'node': node_ea,
'children': [child1_ea, child2_ea...],
'parents': [parent1_ea, parent2_ea]
},
...} '''
gdown = self.graph_down(LocByName(origin))
gup = self.graph_up(LocByName(destination))
gconnect = dict()
for node_ea in gdown.keys():
if node_ea in gup.keys():
gconnect[node_ea] = dict()
gconnect[node_ea]['node'] = node_ea
gconnect[node_ea]['children'] = gdown[node_ea]
gconnect[node_ea]['parents'] = gup[node_ea]
else:
pass
return gconnect
def connect_graph_import(self, origin, destination):
'''
Wrapper to connect_graph(). This allows origin to be an import.
Ex. Graph between "recv" and "WriteFile"
@type origin: string
@param origin: Function NAME
@type destination: string
@param destination: Function NAME
@note: This returns several "connect graphs", one for every function
calling the "origin" import. Indexed by address.
See connect_graph() for graph type definition.
@rtype: dictionary
@return: Complex data {imp_caller1_ea : connect_graph1, ...} '''
graph_dict = dict()
import_callers_dict = self._find_import_callers(origin)
for imp_caller_addr in import_callers_dict.keys():
# imp_caller_addr is the address within the function, where
# the actual call instruction is located, not the ea (beginning)
imp_caller_name = GetFunctionName(imp_caller_addr)
imp_caller_ea = LocByName(imp_caller_name)
graph_dict[imp_caller_ea] = self.connect_graph(imp_caller_name, destination)
return graph_dict
def advanced_connect_graph(self, origin, destination):
'''
Using networkx library.
http://networkx.lanl.gov
@todo: As with ConnectGraph!OnRefresh, improve the clumsy algorithm :) '''
if self.nx_support:
gdown = self.graph_down(LocByName(origin))
gup = self.graph_up(LocByName(destination))
nx_gconnect = nx.DiGraph()
gconnect = self.connect_graph(origin, destination)
for x in self.gconnect.itervalues():
node_ea = x['node']
nx_gconnect.add_node(node_ea)
for c in x['children']:
try:
nx_gconnect.add_node(c)
nx_gconnect.add_edge(node_ea, c)
except:
continue
for p in x['parents']:
try:
nx_gconnect.add_node(p)
nx_gconnect.add_edge(p, node_ea)
except:
continue
nx.draw(nx_gconnect)
plt.show()
return True
else:
print "[debug] Sorry, support for networkx is *disabled*"
return False
def show_path(self, origin, destination, color = 0x2020c0):
'''
Colorizes a path.
Originally though to be useful to visualize "connect graphs".
@type origin: string
@param origin: Function NAME
@type destination: string
@param destination: Function NAME
@rtype: dictionary
@return: Complex struct. See connect_graph() '''
conn_graph = self.connect_graph(origin, destination)
# The connection graph is a complex data structure, but
# _colorize_graph() argument is a list of nodes
graph_list = [x['node'] for x in conn_graph.itervalues()]
self._colorize_graph(graph_list, color)
return conn_graph
def enum_switches(self, graph):
'''
Enumerate all switches in downgraph
Shamelessly copied from Aaron Portnoy :)
@type graph: graph
@param graph: Complex structure. See connect_graph()
@rtype: dictionary
@return: dictionary { address : [cmp_mnem, disasm] } '''
switch_dict = dict()
jmpList = ['jmp', 'jz', 'jnz', 'jg', 'jl', 'ja', 'jb']
# Extract a *list* of nodes from the graph data structure
graph_list = [x['node'] for x in graph.itervalues()]
for func_start in graph_list:
# if the function end isn't defined (probably a library call) then skip it
func_end = FindFuncEnd(func_start)
if func_end == 0xFFFFFFFF:
continue
for instr in FuncItems(func_start):
# check for switch jump
if GetMnem(instr) in jmpList:
# step backwards and find the cmp for the # of cases (if possible)
prev_instruction = PrevHead(instr, 0)
count = 5
while count > 0:
if GetMnem(prev_instruction) == 'cmp':
# get comparison number, plus for for case 0
cmp_mnem = GetDisasm(prev_instruction)
switch_dict[instr] = [cmp_mnem, GetDisasm(instr)]
break
prev_instruction = PrevHead(prev_instruction, 0)
count -= 1
return switch_dict
def imm_compares(self, graph):
'''
Find all immediate compares in a graph.
It's useful when analyzing proprietary formats.
@type graph: graph
@param graph: Complex data structure. See connect_graph()
@rtype: dictionary
@return: dictionary of { addr : [op1, op2], ... } '''
imm_cmp = dict()
# Extract a *list* of nodes from the graph data structure
graph_list = [x['node'] for x in graph.itervalues()]
for func_start in graph_list:
# if the function end isn't defined (probably a library call) then skip it
func_end = FindFuncEnd(func_start)
if func_end == 0xFFFFFFFF:
continue
for instr in FuncItems(func_start):
disasm = GetDisasm(instr)
if 'cmp' in disasm:
if GetOpType(instr, 1) == 5: # immediate value
if self.debug:
print "[debug] imm cmp at 0x%08x: %s" % (instr, GetDisasm(instr))
imm_cmp[instr] = [GetOpnd(instr, 0), GetOpnd(instr, 1)]
return imm_cmp
def mark_imm_compares(self, color = 0x2020c0):
'''
Mark all immediate compares in the current function.
Very useful when debugging parsers, for example.
@type color: hex
@param color: color for the mark '''
for instr in FuncItems(ScreenEA()):
disasm = GetDisasm(instr)
if "cmp" in disasm:
if GetOpType(instr, 1) == 5: # immediate value
if self.debug:
print "[debug] imm cmp at 0x%08x: %s" % (instr, GetDisasm(instr))
SetColor(instr, CIC_ITEM, color)
return True
def function_bb_connect(self, bb_src_ea, bb_dst_ea, color = 0x2020c0):
'''
Graphically connect (color) basic blocks within a function.
It could save your life! :) '''
set_down = set([])
set_up = set([])
self.color = color
# Nasty trick to get function's start EA
f = get_func(bb_src_ea) # func_t object
# Calculate the downgraph (originating at bb_src_ea)
set_down = self._aux_calc_down_set(f, [bb_src_ea])
# Calculate the upgraph set (originating at bb_dst_ea)
set_up = self._aux_calc_up_set(f, [bb_dst_ea])
ConnectedPaths = set_down.intersection(set_up)
if ConnectedPaths:
for PathBlock in ConnectedPaths:
SetColor(PathBlock, CIC_ITEM, self.color)
else:
print "[debug] No path connecting those two basic blocks :("
def _aux_calc_down_set(self, f, CurrentBlockLayer, DownGraphBlockSet = set([])):
'''
Analogous to graph_down().
To set the "root" block, call with CurrentBlockLayer = [bb_src_ea]
@rtype: set
@return: set containing upgraph blocks '''
self.FuncFlowChart = FlowChart(f)
self.CurrentBlockLayer = CurrentBlockLayer
self.NextBlockLayer = list()
# Iterate through all basic blocks and get the egress connections.
for bb in self.CurrentBlockLayer: # bb: address
block = self._aux_lookup_ea_bb(f, bb)
for enode in block.succs(): # enode: basic block type
if enode.startEA not in DownGraphBlockSet: # Eliminates recursions
self.NextBlockLayer.append(enode.startEA)
DownGraphBlockSet.add(enode.startEA)
self._aux_calc_down_set(f, self.NextBlockLayer, DownGraphBlockSet)
return DownGraphBlockSet
def _aux_lookup_ea_bb(self, f, ea):
'''
Returns a basic block object given an address
@type f: func_t object
@param f: represents the function of interest
@type ea: address
@param ea: address of the basic block
@rtype: Basic Block Object
@return: well... a basic block object :) '''
self.f = f
self.ea = ea
self.FlowChart = FlowChart(f)
for bb in self.FlowChart:
if bb.startEA == self.ea:
return bb
return False
def _aux_calc_up_set(self, f, CurrentBlockLayer, UpGraphBlockSet = set([])):
'''
Auxiliary function. I couldn't make Basic Block preds() work,
so I need to calculate the upgraph myself.
Note: preds(), I kill you! :) '''
self.FuncFlowChart = FlowChart(f)
self.CurrentBlockLayer = CurrentBlockLayer
self.NextBlockLayer = list()
for block in self.FuncFlowChart: # full lookup (it could be enhanced)
for bsuccs in block.succs(): # .succs() returns a generator
if bsuccs.startEA in CurrentBlockLayer: # it's a parent
if block.startEA not in UpGraphBlockSet:
self.NextBlockLayer.append(block.startEA)
UpGraphBlockSet.add(block.startEA)
self._aux_calc_up_set(f, self.NextBlockLayer, UpGraphBlockSet)
return UpGraphBlockSet
def function_graph(self, ea):
'''
It creates a graph of basic blocks and their children.
@type ea: address
@param ea: address anywhere within the analyzed function.
@rtype: dictionary
@return: dictionary { block_ea: [branch1_ea, branch2_ea], ... } '''
bb_dict = dict()
f = FlowChart(get_func(ea)) #FlowChart object
for bb in f:
bb_dict[bb.startEA] = list() # Dict of BasicBlock objects
for child in bb.succs():
bb_dict[bb.startEA].append(child.startEA)
return bb_dict
def locate_function_call(self, func_name, callee):
'''
Convenience function. It locates a particular function call *within a function*.
@type func_name: string
@param func_name: NAME of the function containing the call
@type callee: string
@param callee: NAME of the function being called
@rtype: List
@return: List of addresses ("call callee" instructions)
@todo: IIRC this needs to be improved/fixed '''
call_addr_list = list()
func_ea = LocByName(func_name) # returns startEA
# If there's a thunk, it won't be called directly from the function (dough!)
# Is the callee located inside .idata section and called through a thunk?
callee_ea = LocByName(callee)
xr = XrefsTo(callee_ea, True)
xrl = list(xr) # ugly but easy
if len(xrl) == 1: # thunks are call bottlenecks
xrf = get_func(xrl[0].frm)
if (xrf.flags & idaapi.FUNC_THUNK) != 0:
# it IS a thunk
callee = GetFunctionName(xrl[0].frm)
for instr in FuncItems(func_ea):
disasm = GetDisasm(instr)
if "call" in disasm and callee in disasm:
call_addr_list.append(instr)
if self.debug:
print "[debug] Found", disasm, "at %08x" % instr
return call_addr_list
def dangerous_size_param(self, color = 0xFF8000, mark = False):
'''
Some functions copy buffers of size specified by a size_t parameter.
If this isn't a constant, there's a chance that it can be manipulated
leading to a buffer overflow.
Example: void *memset( void *dest, int c, size_t count ); '''
regexp = ".*memset|.*memcpy|.*memmove|.*strncpy|.*strcpy.*|.*sncpy"
candidate_dict = self._find_import_callers(regexp)
for candidate_ea, imp_ea_list in candidate_dict.iteritems():
# For every candidate function, look for the calls
# to dangerous functions within it
for danger_ea in imp_ea_list:
func_caller = GetFunctionName(candidate_ea)
imp_callee = Name(danger_ea)
# List of addresses within the function ("call dangerous_func")
addr_list = self.locate_function_call(func_caller, imp_callee)
if addr_list:
print "------ Analysing %s ------" % func_caller
tmp_push_list = list()
func_start = LocByName(func_caller)
func_end = FindFuncEnd(func_start)
# if the function end isn't defined (probably a library call) then skip it
if func_end == 0xFFFFFFFF:
continue
for instr in FuncItems(func_start):
disasm = GetDisasm(instr)
# List with addresses of push instructions
if "push" in disasm:
tmp_push_list.append(instr) # address of the push instruction
elif instr in addr_list:
if len(tmp_push_list) >= 3: # sanity check :)
push_size_addr = tmp_push_list[-3]
if GetOpType(push_size_addr, 0) < 5: # This can be improved
print "[debug] %08x - %s" % (instr, GetDisasm(push_size_addr))
if mark == True:
SetColor(instr, CIC_ITEM, 0x2020c0)
else:
continue
return True
def locate_file_io(self, interactive = False):
'''
Convenience function
Finds interesting IO related *imports* and the functions calling them.
Call with interactive = True to display a custom viewer ;)
@rtype: Dictionary (of lists)
@return: Dictionary containing the functions calling the imported functions,
{fn_ea: [file_io1_ea, file_io2_ea, ...], ...} '''
# The meat and potatoes is the regexp
regexp = ".*readf.*|.*write.*|.*openf.*|f.*print.*"
callerDict = self._find_import_callers(regexp)
if interactive:
file_io_cview = SuspiciousFuncsViewer()
if file_io_cview.Create("File IO", callerDict):
file_io_cview.Show()
else:
print "[debug] Failed to create custom view: File IO"
return callerDict
def locate_net_io(self, interactive = False):
'''
Convenience function
Finds interesting network related *imports* and the functions calling them.
Call with interactive = True to display a custom viewer ;)
@rtype: Dictionary (of lists)
@return: Dictionary containing the functions calling the imported functions,
{fn_ea: [net_io1_ea, net_io2_ea, ...], ...} '''
# The meat and potatoes is the regexp
regexp = "recv|recvfrom|wsa.*"
callerDict = self._find_import_callers(regexp)
if interactive:
net_io_cview = SuspiciousFuncsViewer()
if net_io_cview.Create("Net IO", callerDict):
net_io_cview.Show()
else:
print "[debug] Failed to create custom view: Net IO"
return callerDict
def locate_allocs(self, interactive = False):
'''
Convenience function
Finds interesting allocation related *imports* and the functions calling them.
Call with interactive = True to display a custom viewer ;)
@rtype: Dictionary (of lists)
@return: Dictionary containing the functions calling the imported functions,
{fn_ea: [alloc1_ea, alloc2_ea, ...], ...} '''
# The meat and potatoes is the regexp
regexp = ".*alloc.*|.*free.*"
callerDict = self._find_import_callers(regexp)
if interactive:
allocs_cview = SuspiciousFuncsViewer()
if allocs_cview.Create("Allocs", callerDict):
allocs_cview.Show()
else:
print "[debug] Failed to create custom view: Allocs"
return callerDict
def locate_most_referenced(self, number = 10, interactive = False):
''' Identifying these is an important first step '''
self.number = number
self.interactive = interactive
referenceDict = dict()
topReferencesDict = dict()
for funcAddr in Functions():
refNumber = sum(1 for e in XrefsTo(funcAddr, True)) # stackoverflow ;)
referenceDict[funcAddr] = refNumber
# Log to IDA's output window and to a custom viewer <3
print "Top %d most referenced functions" % self.number
NrResults = 0
# Let's order this stuff nicely
for func_ea, refnumber in sorted(referenceDict.iteritems(), reverse = True, key = lambda (k, v): (v, k)):
NrResults += 1 # control counter
if NrResults > self.number:
break
else:
print "%s : %s" % (GetFunctionName(func_ea), refnumber)
topReferencesDict[func_ea] = refnumber
# Create the custom viewer
if self.interactive:
toprefs_cview = SuspiciousFuncsViewer()
if toprefs_cview.Create("Top referenced", topReferencesDict):
toprefs_cview.Show()
else:
print "[debug] Failed to create custom view: Top referenced"
return topReferencesDict
def _find_import_callers(self, regexp):
'''
Finds interesting imported functions and the nodes that call them.
Very handy in locating user inputs.
@attention: There are imports called through a thunk and directly.
@rtype: Dictionary (of lists)
@return: Dictionary containing *the address of the functions*
calling the imports,
{fn_call_ea: [idata1_ea, idata2_ea, ...], ...}
@todo: IIRC this needs some review '''
importCallers = dict()
importPattern = re.compile(regexp, re.IGNORECASE)
for imp_name, idata_ea in self.import_dict.iteritems():
# This dict has the *IAT names* (i.e. __imp_ReadFile, within the .idata section)
if importPattern.match(imp_name):
for import_caller in XrefsTo(idata_ea, True):
import_caller_addr = import_caller.frm
import_caller_fn = get_func(import_caller_addr)
if import_caller_fn:
# Check if caller is a THUNK
if (import_caller_fn.flags & idaapi.FUNC_THUNK) != 0:
# It IS a thunk
for thunk_caller in XrefsTo(import_caller_addr, True):
thunk_caller_fn = get_func(thunk_caller.frm)
import_caller_ea = thunk_caller_fn.startEA
if importCallers.has_key(import_caller_ea):
# Remove nasty duplicates
if idata_ea in importCallers[import_caller_ea]:
continue
else:
importCallers[import_caller_ea].append(idata_ea)
else:
importCallers[import_caller_ea] = [idata_ea]
else:
# It is NOT a thunk, no need for recursion
import_caller_ea = import_caller_fn.startEA
if importCallers.has_key(import_caller_ea):
# Remove nasty duplicates
if idata_ea in importCallers[import_caller_ea]:
continue
else:
importCallers[import_caller_ea].append(idata_ea)
else:
importCallers[import_caller_ea] = [idata_ea]
else:
#import_caller_fn is None
pass
return importCallers
def export_functions_to_file(self, extended = False):
'''
Export all the function start addresses to a file. This will be used by a tracer.
The extended option logs the number of arguments as well. '''
self.extended = extended
filename = AskFile(1, "*.*", "File to export functions to?")
f = open(filename, "w")
print "Exporting function addresses to %s\n" % filename
idx = 0
# The string format is:
# 0xAAAAAAAA-0xBBBBBBBB {ea_start, ea_end}
for function_start in Functions():
function_end = GetFunctionAttr(function_start, FUNCATTR_END)
# Below I've just stripped the leading '0x' chars
addr_interval_string = str(hex(function_start)).split('0x')[1] + '-' + str(hex(function_end)).split('0x')[1]
if self.extended:
# Get the number of function args
frame = GetFrame(f)
if frame is None: continue
ret = GetMemberOffset(frame, " r")
if ret == -1: continue
firstArg = ret + 4
NumberOfArguments = (GetStrucSize(frame) - firstArg)/4 # Every arg on the stack is 4 bytes long
addr_interval_string += ",%d" % NumberOfArguments
f.write(addr_interval_string + '\n')
idx += 1
f.close()
print "%d functions written to disk" % idx
def import_functions_from_file(self):
'''
Import all the function start addresses to a file.
Rudimentary differential debugging, yay! '''
filename = AskFile(0, "*.*", "File to import functions from?")
print "Importing function start addresses from %s\n" % filename
idx = 0
f = open(filename, 'r')
function_addresses = f.readlines() # I still have to strip them
f.close()
imported_fn_dict = dict()
for fa in function_addresses:
f_addr = int(fa.split('-')[0], 16)
imported_fn_dict[f_addr] = GetFunctionName(f_addr)
SetColor(f_addr, CIC_FUNC, 0x188632)
idx += 1
print "[debug] %d functions imported from file" % idx
# A custom viewer doesn't hurt :)
imported_fn_cview = SuspiciousFuncsViewer()
if imported_fn_cview.Create("Specific Functions", imported_fn_dict, onhint_active = False):
imported_fn_cview.Show()
else:
print "[debug] Failed to create custom view: Specific Functions"
def import_basic_blocks_from_file(self):
'''
Import hit basic blocks from a detailed PIN Trace.
A choser allows to somehow re-trace execution within
the functions we are interested in. '''
filename = AskFile(0, "*.*", "File to import basic blocks from?")
print "Importing basic block addresses from %s\n" % filename
token = '$'
idx = 0
f = open(filename, 'r')
lines = f.readlines()
f.close()