Package Libs :: Module librecognition
[hide private]
[frames] | no frames]

Source Code for Module Libs.librecognition

  1  """
 
  2  (c) Immunity, Inc. 2004-2007
 
  3  
 
  4  
 
  5  U{Immunity Inc.<http://www.immunityinc.com>}
 
  6  
 
  7  
 
  8  Library for function recognizing
 
  9  
 
 10  """ 
 11  __VERSION__ = '1.2' 
 12  
 
 13  
 
 14  from libanalyze import * 
 15  from libdatatype import * 
 16  from libstackanalyze import * 
 17  import binascii 
 18  import struct 
 19  import hashlib 
 20  import re 
 21  import string 
 22  import debugger 
 23  import csv 
 24  import os 
 25  
 
26 -class MultiCSVIterator:
27 - def __init__(self, dictionaries):
28 if not isinstance(dictionaries, list): 29 dictionaries = [ dictionaries ] 30 31 self.iterators = [] 32 self.fds = [] 33 self.idx = 0 34 for d in dictionaries: 35 try: 36 fd = open(d, "rb") 37 except: 38 fd = open(d, "w+b") 39 self.iterators.append(csv.reader(fd)) 40 self.fds.append(fd)
41 - def __iter__(self):
42 for i in range(0, self.idx+1): 43 self.fds[i].seek(0) 44 self.idx = 0 45 return self
46
47 - def __del__(self):
48 while self.iterators: 49 self.iterators.pop() 50 for fd in self.fds: 51 fd.close() 52 del self.fds
53
54 - def next(self):
55 try: 56 data = self.iterators[self.idx].next() 57 except StopIteration: 58 if len(self.iterators) > self.idx+1: 59 self.idx += 1 60 return self.next() 61 else: 62 raise StopIteration 63 #append the filename to each line 64 data.append(self.fds[self.idx].name) 65 return data
66
67 -class FunctionRecognition:
68 - def __init__(self, imm, dictionaryfiles=None):
69 """ 70 This class try to recognize a function using different methods 71 (address/signature/heuristic). 72 73 @type imm: Debbuger OBJECT 74 @param imm: Debbuger instance 75 76 @type dictionaryfiles: STRING|LIST 77 @param dictionaryfiles: Name, or list of names, of .dat files inside the Data folder, where're stored the function 78 patterns. Use an empty string to use all .dat files in Data folder. 79 """ 80 self.imm = imm 81 self.heuristicReferencesCache = {} 82 self.heuristicCache = {} 83 self.resolvCache = {} 84 85 if not dictionaryfiles: 86 dictionaryfiles = [] 87 for file in os.listdir("Data"): 88 if file[-4:] == ".dat": 89 dictionaryfiles.append(os.path.join("Data", file)) 90 self.dictionaries = MultiCSVIterator(dictionaryfiles)
91
92 - def resolvFunctionByAddress(self, address, heuristic=90):
93 """ 94 Look up into our dictionaries to find a function match. 95 96 @type address: DWORD 97 @param address: Address of the function to search 98 99 @type heuristic: INTEGER 100 @param heuristic: heuristic threasold to consider a real function match 101 102 @rtype: STRING 103 @return: a STRING with the function's real name or the given address if there's no match 104 """ 105 106 #cache the answers 107 if self.resolvCache.has_key(address): 108 return self.resolvCache[address] 109 110 #try the exact hash method 111 exact = self.makeFunctionHashExact(address) 112 for data in self.dictionaries: 113 if exact == data[4]: 114 self.resolvCache[address] = data[0] 115 break 116 117 #try the heuristic method 118 if not self.resolvCache.has_key(address): 119 ref = self.selectBasicBlock(address) 120 posThreshold = 0 121 posName = "" 122 for data in self.dictionaries: 123 #cut down the possibilities, because the performance, reproducing the BB selection and comparing the result 124 #XXX: it's not a perfect way, thinking of supporting version changes 125 if ref == data[1]: 126 perc = self.checkHeuristic(address, data[2], data[3]) 127 #self.imm.log("similar to function %s in %d%%" % (data[0], perc)) 128 if perc >= heuristic and perc > posThreshold: 129 posThreshold = perc 130 posName = data[0] 131 if posName: 132 self.resolvCache[address] = posName 133 134 #cache the negative answer 135 if not self.resolvCache.has_key(address): 136 self.resolvCache[address] = "%08X" % address 137 138 return self.resolvCache[address]
139
140 - def checkHeuristic(self, address, reference, refFirstCall=[]):
141 """ 142 Check a given address with a precomputed hash of a function. 143 Return a percentage of match (you can use a threasold to consider a real match) 144 145 @type address: DWORD 146 @param address: Address of the function to compare 147 148 @type reference: STRING 149 @param reference: base64 representation of the compressed information about the function 150 151 @type refFirstCall: STRING 152 @param refFirstCall: the same, but following the function pointed by the first call in the first BB. 153 (OPTIONAL) 154 155 @rtype: INTEGER 156 @return: heuristic threasold to consider a real function match 157 """ 158 159 #self.imm.log("checking heuristically: %08X" % address) 160 161 #do the hard work just one time 162 if self.heuristicCache.has_key(address): 163 cfg = self.heuristicCache[address] 164 else: 165 cfg = self.makeFunctionHashHeuristic(address) 166 self.heuristicCache[address] = cfg 167 168 #check reference against our cache 169 sha1 = hashlib.sha1(reference+refFirstCall).digest() 170 if self.heuristicReferencesCache.has_key(sha1): 171 refcfg = self.heuristicReferencesCache[sha1] 172 else: 173 #This's the reference hash to compare with (uncompress just once and cache the results) 174 #Decode each BB-hash 175 refcfg = [] 176 refcfg.append([]) 177 refcfg.append([]) 178 data = binascii.a2b_base64(reference) 179 for o in range(0,len(data),12): 180 (start, left, right) = struct.unpack("LLL",data[o:o+12]) 181 refcfg[0].append([ start, left, right ]) 182 if refFirstCall: 183 data = binascii.a2b_base64(refFirstCall) 184 for o in range(0,len(data),12): 185 (start, left, right) = struct.unpack("LLL",data[o:o+12]) 186 refcfg[1].append([ start, left, right ]) 187 self.heuristicReferencesCache[sha1] = refcfg 188 189 perc1 = self.compareHeuristic(cfg[0][:], refcfg[0][:]) 190 if cfg[1] or refcfg[1]: 191 perc2 = self.compareHeuristic(cfg[1][:], refcfg[1][:]) 192 #use the average 193 perc = (perc1 + perc2) / 2 194 else: 195 perc = perc1 196 197 return perc
198
199 - def compareHeuristic(self, cfg, refcfg):
200 #for tmp in cfg: 201 #self.imm.log("check start: %08X - left: %08X - right: %08X" % (tmp[0],tmp[1],tmp[2])) 202 203 #for tmp in refcfg: 204 #self.imm.log("ref start: %08X - left: %08X - right: %08X" % (tmp[0],tmp[1],tmp[2])) 205 206 diff = eq = 0 207 checked = [] 208 #Compare each BB-hash 209 for info in cfg: 210 bbeq = value = 0 211 for rinfo in refcfg: 212 tmp = 0 213 if info[0] == rinfo[0]: tmp += 1 214 if info[1] == rinfo[1]: tmp += 1 215 if info[2] == rinfo[2]: tmp += 1 216 if tmp > bbeq: 217 bbeq = tmp 218 value = rinfo 219 if bbeq == 3: break 220 try: 221 idx=refcfg.index(value) 222 refcfg.pop(idx) 223 except ValueError: 224 pass 225 #self.imm.log("value %s not found in refcfg" % value) 226 eq += bbeq 227 diff += 3 - bbeq 228 229 #crossed check 230 for rinfo in refcfg: 231 bbeq = value = 0 232 for info in cfg: 233 tmp = 0 234 if info[0] == rinfo[0]: tmp += 1 235 if info[1] == rinfo[1]: tmp += 1 236 if info[2] == rinfo[2]: tmp += 1 237 if tmp > bbeq: 238 bbeq = tmp 239 value = rinfo 240 if bbeq == 3: break 241 try: 242 idx=cfg.index(value) 243 cfg.pop(idx) 244 except ValueError: 245 pass 246 #self.imm.log("value %s not found in cfg" % value) 247 eq += bbeq 248 diff += 3 - bbeq 249 250 #self.imm.log("eq=%d, diff=%d" % (eq,diff)) 251 return eq * 100 / (eq + diff)
252
253 - def makeFunctionHashHeuristic(self, address, compressed = False, followCalls = True):
254 """ 255 Consider: 256 - Control Flow Graph 257 - generalized instructions that: 258 access memory/write memory/use registers/use constant/call/jmp/jmc 259 and all his combinations. 260 - special case of functions with just 1 BB and a couple of calls (follow the first call) 261 262 @type address: DWORD 263 @param address: address of the function to hash 264 265 @type compressed: Boolean 266 @param compressed: return a compressed base64 representation or the raw data 267 268 @type followCalls: Boolean 269 @param followCalls: follow the first call in a single basic block function 270 271 @rtype: LIST 272 @return: the first element is described below and the second is the result of this same function but over the first 273 call of a single basic block function (if applies), each element is like this: 274 a base64 representation of the compressed version of each bb hash: 275 [4 bytes BB(i) start][4 bytes BB(i) 1st edge][4 bytes BB(i) 2nd edge] 276 0 <= i < BB count 277 or the same but like a LIST with raw data. 278 """ 279 280 f = self.imm.getFunction(address) 281 bbs = f.getBasicBlocks() 282 bbmap = {} 283 cfg = {} 284 285 #Make a control flow graph 286 for bb in bbs: 287 cfg[bb.getStart()] = bb.getEdges() 288 289 #Make a hash of each BB 290 for bb in bbs: 291 bbhash_data = [] 292 for op in bb.getInstructions(self.imm): 293 #take into account just information about the opcode 294 instr = [] 295 instr.append(op.getMemType()) 296 instr.append(op.indexed) 297 instr.append(op.getCmdType()) 298 instr.append(op.optype[0]) 299 instr.append(op.optype[1]) 300 instr.append(op.optype[2]) 301 instr.append(op.getSize()) 302 bbhash_data.append(self.hash_a_list(instr)) 303 bbhash = self.hash_a_list(bbhash_data) 304 bbmap[bb.getStart()] = bbhash 305 306 #Replace BB addresses with hashes 307 rcfg = [] 308 for start,edges in cfg.iteritems(): 309 rstart = 0 310 redges = [0, 0] 311 rstart = bbmap[start] 312 if bbmap.has_key(edges[0]): 313 redges[0] = bbmap[edges[0]] 314 if bbmap.has_key(edges[1]): 315 redges[1] = bbmap[edges[1]] 316 rcfg.append([ rstart,redges[0],redges[1] ]) 317 318 #special case for functions with just one basic block and one or more calls 319 firstcall = [] 320 if followCalls and len(bbs) == 1 and len(bbs[0].getCalls()) > 0: 321 #we follow the first call and do the same work there, but avoiding recursion 322 #XXX: why the first? 323 op = self.imm.disasm(bbs[0].getCalls()[0]) 324 if op.getJmpConst(): 325 firstcall = self.makeFunctionHashHeuristic(op.getJmpConst(), compressed, followCalls=False)[0] 326 #self.imm.log("following first call to: %08X" % op.getJmpConst()) 327 del op 328 329 del bbs 330 del f 331 rcfg.sort() 332 333 if compressed: 334 #make the final hash 335 fhash = "" 336 for data in rcfg: 337 #[4 bytes BB(i) start][4 bytes BB(i) 1st edge][4 bytes BB(i) 2nd edge] 338 fhash += struct.pack("LLL", data[0], data[1], data[2]) 339 return [ binascii.b2a_base64(fhash)[:-1], firstcall ] 340 else: 341 return [ rcfg, firstcall ]
342
343 - def hash_a_list(self,data):
344 """ 345 Take a list and return a binary representation of his CRC32. 346 347 @type data: LIST 348 @param data: a list of elements to make the hash 349 350 @rtype: UNSIGNED LONG 351 @return: a hash of the given values 352 """ 353 354 ret = 0 355 for elem in data: 356 ret = binascii.crc32(str(elem), ret) 357 return struct.unpack("L", struct.pack("l",ret))[0]
358
359 - def searchFunctionByHeuristic(self, csvline, heuristic = 90, module = None):
360 """ 361 Search memory to find a function that fullfit the options. 362 363 @type csvline: STRING 364 @param csvline: A line of a Data CSV file. This's a simple support for copy 'n paste from a CSV file. 365 366 @type heuristic: INTEGER 367 @param heuristic: heuristic threasold to consider a real function match 368 369 @type module: STRING 370 @param module: name of a module to restrict the search 371 372 @rtype: LIST 373 @return: a list of tuples with possible function's addresses and the heauristic match percentage 374 """ 375 376 line = csv.reader([csvline]).next() 377 if len(line) < 9: line[7] = "" #support for older entries 378 return self._searchFunctionByHeuristic(line[1], line[2], line[3], line[4], heuristic, module, string.split(line[7],"|"))
379
380 - def _searchFunctionByHeuristic(self, search, functionhash=None, firstcallhash=None, exact=None, heuristic = 90, module = None, firstbb = None):
381 """ 382 Search memory to find a function that fullfit the options. 383 384 @type search: STRING 385 @param search: searchCommand string to make the first selection 386 387 @type functionhash: STRING 388 @param functionhash: the primary function hash (use makeFunctionHash to generate this value) 389 390 @type firstcallhash: STRING 391 @param firstcallhash: the hash of the first call on single BB functions (use makeFunctionHash to generate this value) 392 393 @type exact: STRING 394 @param exact: an exact function hash, this's a binary byte-per-byte hash (use makeFunctionHash to generate this value) 395 396 @type heuristic: INTEGER 397 @param heuristic: heuristic threasold to consider a real function match 398 399 @type module: STRING 400 @param module: name of a module to restrict the search 401 402 @type firstbb: STRING 403 @param firstbb: generalized assembler of the first BB (to search function begin) 404 405 @rtype: LIST 406 @return: a list of tuples with possible function's addresses and the heauristic match percentage 407 """ 408 409 #if the first argument is a LIST, decode it to each real argument of the function, following the order in the CSV file. 410 #this give us a simple support for copy 'n paste from the CSV file. 411 if isinstance(search, list): 412 search.reverse() 413 tmp = search[:] 414 if tmp: search = tmp.pop() 415 if tmp: functionhash = tmp.pop() 416 if tmp: firstcallhash = tmp.pop() 417 if tmp: exact = tmp.pop() 418 if tmp: version = tmp.pop() 419 if tmp: file = tmp.pop() 420 if tmp: firstbb = tmp.pop() 421 422 #this arguments are mandatory 423 if not search or not functionhash: 424 return None 425 426 if not firstcallhash: 427 firstcallhash = "" 428 429 heu_addy = None 430 heu_perc = 0 431 poss_functions = [] 432 poss_return = [] 433 search = string.replace(search, "\\n","\n") 434 if search: 435 if module: 436 #XXX: access directly isn't the best way to do this 437 for key,mod in debugger.get_all_modules().iteritems(): 438 if module.lower() in key.lower(): 439 poss_functions += self.imm.searchCommandsOnModule(mod[0], search) 440 else: 441 poss_functions = self.imm.searchCommands(search) 442 if poss_functions: 443 for poss in poss_functions: 444 #self.imm.log("possible funct: %08X" % poss[0]) 445 addy = self.imm.getFunctionBegin(poss[0]) 446 if not addy: 447 #check entrypoint routine 448 for mod in self.imm.getAllModules().values(): 449 if mod.getMainentry(): 450 #self.imm.log("mainentry: %08X" % mod.getMainentry()) 451 f = StackFunction(self.imm, mod.getMainentry()) 452 if f.isInsideFunction(poss[0]): 453 addy = mod.getMainentry() 454 break 455 if not addy and firstbb: 456 #self.imm.log("Trying with the new firstbb") 457 addy = self.findBasicBlockHeuristically(poss[0], firstbb) 458 if not addy and firstbb: 459 tmp = self.findFirstBB(poss[0]) 460 if tmp: 461 #self.imm.log("Trying with the new firstbb 2nd try:%X"%tmp,tmp) 462 addy = self.findBasicBlockHeuristically(tmp, firstbb) 463 if not addy: 464 addy = poss[0] 465 #self.imm.log("possible start: %08X" % addy) 466 467 #Make a comparision using an Exact Hash 468 if exact: 469 test = self.makeFunctionHashExact(addy) 470 if exact == test and not firstcallhash: 471 #self.imm.log("EXACT match") 472 #when we find an exact match, we don't need to search anymore 473 return [ (addy, 100) ] 474 475 perc = self.checkHeuristic(addy, functionhash, firstcallhash) 476 #self.imm.log("function %08X similar in %d%%" % (addy, perc)) 477 if perc >= heuristic: 478 poss_return.append( (addy,perc) ) 479 #self.imm.log("HEURISTIC match") 480 return poss_return
481
482 - def searchFunctionByName(self, name, heuristic = 90, module = None, version = None):
483 """ 484 Look up into our dictionaries to find a function match. 485 486 @type name: STRING 487 @param name: Name of the function to search 488 489 @type module: STRING 490 @param module: name of a module to restrict the search 491 492 @type version: STRING 493 @param version: restrict the search to the given version 494 495 @type heuristic: INTEGER 496 @param heuristic: heuristic threasold to consider a real function match 497 498 @rtype: LIST 499 @return: a list of tuples with possible function's addresses and the heauristic match percentage 500 """ 501 #the name is case insensitive 502 name = name.lower() 503 504 #Heuristic search 505 poss_return = [] 506 for data in self.dictionaries: 507 if name == data[0].lower(): 508 #support version matching 509 if version and version.lower() != data[6].lower(): 510 continue 511 512 #self.imm.log("trying with: %s, version: %s" % ( data[0], data[5])) 513 if len(data) < 9: data[7] = "" #support for older entries 514 poss_return += self._searchFunctionByHeuristic(data[1], data[2], data[3], data[4], heuristic, module, string.split(data[7],"|")) 515 return poss_return
516
517 - def makeFunctionHashExact(self, address):
518 """ 519 Return a SHA-1 hash of the function, taking the raw bytes as data. 520 521 @type address: DWORD 522 @param address: address of the function to hash 523 524 @rtype: STRING 525 @return: SHA-1 hash of the function 526 """ 527 528 f = self.imm.getFunction(address) 529 bbs = f.getBasicBlocks() 530 bucket = "" 531 data = {} 532 533 for bb in bbs: 534 data[bb.getStart()] = self.imm.readMemory(bb.getStart(), bb.getSize()) 535 536 keys = data.keys() 537 keys.sort() 538 539 for key in keys: 540 bucket += data[key] 541 542 hash = hashlib.sha1(bucket).hexdigest() 543 del bucket 544 del bbs 545 del f 546 return hash
547
548 - def makeFunctionHash(self, address, compressed = False):
549 """ 550 Return a list with the best BB to use for a search and the heuristic hash 551 of the function. This two components are the function hash. 552 553 @type address: DWORD 554 @param address: address of the function to hash 555 556 @type compressed: Boolean 557 @param compressed: return a compressed base64 representation or the raw data 558 559 @rtype: LIST 560 @return: 1st element is the generalized instructions to use with searchCommand 561 2nd element is the heuristic function hash (makeFunctionHashHeuristic) 562 3rd element is an exact hash of the function (makeFunctionHashExact) 563 4th element is a LIST of generalized instructions of the first BB (to find the function begin) 564 """ 565 566 ret = [] 567 ret.append(self.selectBasicBlock(address)) 568 ret.append(self.makeFunctionHashHeuristic(address, compressed)) 569 ret.append(self.makeFunctionHashExact(address)) 570 ret.append(self.generalizeFunction(address)[1][1]) 571 return ret
572
573 - def selectBasicBlock(self, address):
574 bbs = self.generalizeFunction(address) 575 576 #make some punctuation to get the BB with major diversity and 577 #quantity of instructions 578 hpoints = bb = 0 579 for id, instrs in bbs[1].iteritems(): 580 map = {} 581 sum = 0 582 for instr in instrs: 583 sum += 1 584 base = instr.split(" ") 585 if "REP" in base[0]: 586 base = base[0] + " " + base[1] 587 else: 588 base = base[0] 589 map[base] = True 590 if sum > 7: break 591 592 #it's four times more important diversity than quantity 593 #We can use 8 instructions to search, so priorize diversity 594 points = sum + len(map)*4 595 if points > hpoints: 596 #self.imm.log("new hpoint (%d, last %d): %s" % (points,hpoints,instrs[0:8])) 597 #self.imm.log("sum: %d diver: %d" % (sum, len(map))) 598 hpoints = points 599 bb = id 600 ret = "" 601 if bb: 602 ret = string.join(bbs[1][bb][0:8],"\\n") 603 del bbs 604 return ret
605
606 - def generalizeFunction(self, address):
607 """ 608 Take an address an return a generalized version of the function, dismissing 609 address and register dependant information. 610 611 @type address: DWORD 612 @param address: address to the function begin 613 614 @rtype: LIST 615 @return: the 1st value is a DICTIONARY of a Control Flow Graph of the 616 BB conexions (each BB have an arbitrary ID) 617 the 2nd value is a DICTIONARY using this arbitrary BB ID as the key 618 and a LIST of searchCommand suitable, generalized instructions. 619 """ 620 bbcount = 1 621 bbmap = {} 622 cfg = {} 623 bbinfo = {} 624 625 f = self.imm.getFunction(address) 626 bbs = f.getBasicBlocks() 627 628 #Make a control flow graph 629 for bb in bbs: 630 if not bbmap.has_key(bb.getStart()): 631 bbmap[bb.getStart()] = bbcount 632 bbcount += 1 633 if not bbmap.has_key(bb.getEdges()[0]): 634 bbmap[bb.getEdges()[0]] = bbcount 635 bbcount += 1 636 if not bbmap.has_key(bb.getEdges()[1]): 637 bbmap[bb.getEdges()[1]] = bbcount 638 bbcount += 1 639 640 cfg[bbmap[bb.getStart()]] = [ bbmap[bb.getEdges()[0]], bbmap[bb.getEdges()[1]] ] 641 642 regex = [] 643 for op in bb.getInstructions(self.imm): 644 asm = self.generalizeInstruction(op) 645 regex.append(asm) 646 #self.imm.log("%s --> %s" % (op.getDisasm(), asm)) 647 bbinfo[bbmap[bb.getStart()]] = regex 648 649 del bbs 650 del f 651 del regex 652 return [ cfg, bbinfo ]
653
654 - def generalizeInstruction(self, inp):
655 """ 656 Generalize an instruction given an address or an opCode instance 657 658 @type inp: DWORD|OpCode OBJECT 659 @param inp: address to generalize or opcode to generalize 660 661 @rtype: STRING 662 @return: a generalized assembler instruction 663 """ 664 if not isinstance(inp, opCode): 665 op = self.imm.disasm(inp) 666 else: op = inp 667 668 asm = op.getDisasm() 669 670 #replace the constants inside the opcode to the word CONST 671 if op.isConditionalJmp(): 672 asm = "JCC CONST" 673 if op.getImmConst() or op.operand[0][0] == DEC_CONST: 674 #self.imm.log("const part:%X"%op.getImmConst()) 675 r = re.compile("(?<=[ ,\[])[a-z0-9_\.\@\-]*%X" % op.getImmConst(), re.I) 676 asm = r.sub('CONST', asm) 677 if op.getImmConst() > 0xFFFFBFFF: 678 #self.imm.log("neg part!. %X: %X"%(op.getImmConst(),op.getImmConst()-0x100000000)) 679 r = re.compile("(?<=[ ,\[])[a-z0-9_\.\@\-]*\%X" % (op.getImmConst()-0x100000000), re.I) 680 asm = r.sub('CONST', asm) 681 if op.getAddrConst(): 682 if not op.indexed: 683 asm = asm.split("[")[0]+"[CONST]"+asm.split("]")[1] 684 else: 685 tmp = "%+X" % struct.unpack("l", struct.pack("L", op.getAddrConst())) 686 asm = asm.replace(tmp,"+CONST") 687 if op.getJmpConst(): 688 r = re.compile("(?<=[ ,\[])[a-z0-9_\.\-\@]*%X" % op.getJmpConst(), re.I) 689 asm = r.sub('CONST', asm) 690 691 #<JMP &msvcrt._initterm> --> CONST 692 asm = re.sub(r'(?i)<[a-z\.&_0-9\@\-]+>', "CONST", asm) 693 694 #CALL schannel._SetWrapNoEncrypt@12 --> CONST 695 asm = re.sub(r'(?i)[a-z\.&_0-9\@\-]+\.[a-z\.&_0-9\@\-]+',"CONST", asm) 696 697 #generalize registers 698 if not op.getAddrConst() or not op.indexed: 699 asm = re.sub(r'(?i)(?<![A-Z])E([ABCD]X|[SD]I)(?![A-Z])', 'R32', asm) 700 else: 701 #this's a workaround until we fix wildcard searching 702 asm = re.sub(r'(?i)(?<![A-Z\+\-\[])E([ABCD]X|[SD]I)(?![A-Z])', 'R32', asm) 703 asm = re.sub(r'(?i)(?<![A-Z])([ABCD]X|[SD]I)(?![A-Z])', 'R16', asm) 704 asm = re.sub(r'(?i)(?<![A-Z])[ABCD][HL](?![A-Z])', 'R8', asm) 705 706 #XXX: we can decide to forget some opcodes using ANY n 707 #XXX: we can support replacing registers with RA and RB 708 709 return asm
710
711 - def findBasicBlockHeuristically(self, address, firstbb, maxsteps=20):
712 """ 713 Try to match a generalized BB with an address range (moving backward). 714 715 @type address: DWORD 716 @param address: address used to match with the generalized BB 717 718 @type firstbb: LIST 719 @param firstbb: a list of generalized assembler instructions 720 721 @type maxsteps: INTEGER 722 @param maxsteps: max amount of steps to go backward looking for a BB 723 724 @rtype: DWORD|None 725 @return: starting address of the BB that match with the generalized version or None if we don't find it 726 """ 727 #self.imm.log("whole firstbb: %s" % firstbb) 728 index = address 729 instr = 0 730 while instr < maxsteps: 731 num = 0 732 notmatch = False 733 #compare the whole BB 734 for cmp in firstbb: 735 gen = self.generalizeInstruction(self.imm.disasmForward(index, num)) 736 if gen != cmp: 737 notmatch = True 738 #self.imm.log("%s != %s. idx=%08X - num=%d" % (gen,cmp,index,num)) 739 break 740 num += 1 741 742 if notmatch: 743 index = self.imm.disasmBackward(index, 1).getAddress() 744 instr += 1 745 else: 746 #self.imm.log("BB found using heuristic", index) 747 return index 748 749 return None
750
751 - def findFirstBB(self, address, recursive=False):
752 """ 753 The main idea is traverse a function backward following Xrefs until we reach a point where there's no more Xrefs other than CALLs 754 755 @type address: DWORD 756 @param address: address used find the first BB 757 758 @rtype: DWORD|None 759 @return: Address of the first BB of the function or None if we don't find it 760 """ 761 762 poss = [] 763 764 xref = self.imm.getXrefFrom(address) 765 for info in xref: 766 if info[1] != 3: 767 #not a CALL xref 768 poss.append(info[0]) 769 770 if not xref and not recursive: 771 return None 772 if not poss: 773 return address 774 775 for addy in poss: 776 tmp = self.findFirstBB(addy, True) 777 if tmp: 778 return addy 779 780 return None
781