#! /usr/bin/python # Fast serach automaton # # This script is under the GNU GPL license # # Philippe Biondi import os def make_first_tree(list): automaton={"":{}} for k,p in list.items(): c=automaton[""] for i in range(0,len(p)): if not c.has_key(p[i]): automaton[p[:i+1]]={} c[p[i]]=p[:i+1] c=automaton[c[p[i]]] c[0]=[k] return automaton def make_second_tree(patterns): tree={"":{1:""}} for p in patterns: c=tree[""] for i in range(len(p)-1,-1,-1): if not c.has_key(p[i]): tree[p[i:]]={0:p[i+1:]} c[p[i]]=p[i:] c=tree[c[p[i]]] if p: c[1]=p return tree def make_automaton(list): automaton=make_first_tree(list); tree=make_second_tree(automaton.keys()); for k in automaton.keys(): c=tree[k] while c.has_key(0): l=c[0] c=tree[l] if c.has_key(1): m=automaton[l].copy() m.update(automaton[k]) automaton[k]=m if automaton[k].has_key(0) and automaton[l].has_key(0): automaton[k][0].extend(automaton[l][0]) return automaton def run_automaton(automaton,string,state=""): for c in string: # print "%s, [%s] " % (c, state),automaton[state] if automaton[state].has_key(0): print "Terminal state %s" % automaton[state][0] if automaton[state].has_key(c): state=automaton[state][c] else: state="" return state def do_filter(automaton): """act as a filter""" state="" while 1: txt=os.read(0,8192) if not len(txt): break state=run_automaton(automaton, txt, state) def give_python_automaton(automaton): """output the python automaton object""" print automaton def make_intel_asm(automaton): """output the intel asm code of the automaton""" keys=automaton.keys() names=[] print ";;;;;;;;;;;;;;;;;;;;;;;;;;;" print ";; Fast search automaton" print ";; Philippe Biondi " print ";;" print for v in automaton.values(): if v.has_key(0): nam=v[0] for nm in nam: if names.count(nm) == 0: names.append(nm); print "fs_desc%i db '%s',0" % (len(names),names[-1]) print ";; fsearch" print "fsearch proc addr:dword, len:dword" print " mov ecx, len" print " mov esi,addr" print " mov eax,%i" % (keys.index("")+1) print " dec esi" print "; Main loop" print "fs_mainloop:" print " dec ecx" print " jnz fs_notfinished" print " xor eax,eax" print " ret" print "fs_notfinished:" print " inc esi" for i in range(0,len(keys)): print " dec eax" print " jz fs_state%i" % (i+1) print "; Never reached !!" print " mov eax,-2" print " ret" print "; States" for i in range(0,len(keys)): v=automaton[keys[i]] print ";;;;;;;; State %i" % (i+1) print "fs_state%i:" % (i+1) if v.has_key(0): print "; %s" % v[0] print "lea eax,fs_desc%i" % (names.index(v[0][0])+1) print "ret" else: j=0 for tr in v.keys(): print " cmp byte ptr [esi], '%s'" % tr print " jne fs_st%i_%i" % (i+1,j) print " mov eax,%i" % (keys.index(v[tr])+1) print " jmp fs_mainloop" print "fs_st%i_%i:" % (i+1,j) j=j+1 print " mov eax,%i" % (keys.index("")+1) print " jmp fs_mainloop" print print "fsearch endp" def make_c(automaton): """output the c code of the automaton""" keys=automaton.keys() keys.sort() names=[] print "/*" print " * Fast search automaton" print " * Philippe Biondi " print " */" print print "typedef int (*fsearch_cb)(int pos, char *descr, char *cb_data);" print "typedef int (*fsearch_fun)(int startstate, fsearch_cb cb, char * cb_data, char *buf, int buflen);" print print "int fsearch(int startstate, fsearch_cb cb, char * cb_data, char *buf, int buflen)" print "{" print " register int state=startstate;" print " int ofs=0;" print " char *p=buf;" print " int cb_ret;" print print " while (ofs < buflen) {" print " switch (state) {" for i in range(0,len(keys)): v=automaton[keys[i]] print " case %i:" % (i) if v.has_key(0): for hit in v[0]: print ' if ((cb_ret=cb(ofs,"%s",cb_data)) < 0) return cb_ret;' % hit print ' switch (*p) {' for tr in v.keys(): if tr != 0: print " case '%s':" % tr print " state=%i;" % keys.index(v[tr]) print " goto fs_endloop;" print " default:" print " state=%i;" % keys.index("") print " }" print " break;" print " default:" print ' printf("Error !? State=%i\\n",state);' print " }" print "fs_endloop:" print " ofs++;" print " p++;" print " }" print " return 0;" print "}" def make_c2(automaton): """output the c code of the automaton with hashed state table""" keys=automaton.keys() names=[] print "/*" print " * Fast search automaton" print " * Philippe Biondi " print " */" print print "static int state=%i;" % keys.index("") for i in range(0,len(keys)): v=automaton[keys[i]] print "static inline void state%i(char c)" % i print "{" if v.has_key(0): for hit in v[0]: print ' printf("got %s\\n");' % hit print ' switch (c) {' for tr in v.keys(): if tr != 0: print " case '%s':" % tr print " state=%i;" % keys.index(v[tr]) print " return;" print " default:" print " state=%i;" % keys.index("") print " }" print "}" print "static void (*hash[])(char c) = {" for i in range(0,len(keys)): print " state%i," % i print "};" print "char * fsearch(char *buf, int buflen)" print "{" print " int ofs=0;" print " char *p=buf;" print print " while (ofs < buflen) {" print " hash[state](*p);" print " p++;ofs++;" print " }" print "}" if (__name__=="__main__"): import getopt,sys commands={"c2":make_c2 , "c":make_c , "asm":make_intel_asm, "python":give_python_automaton, "filter":do_filter } def usage(): print "Usage: %s -f listfile -c command\n" \ " -f listfile: give the database ( { \"name\":\"sig\",...} )\n" \ " command is :" % sys.argv[0] for k in commands.keys(): print "%8s : %s" % (k,commands[k].__doc__) try: opts=getopt.getopt(sys.argv[1:],"hf:c:") command=None sig={} for opt, parm in opts[0]: if opt=="-h": usage() sys.exit(0) elif opt=="-f": f=open(parm) for l in f.readlines(): k,v=l.split() sig[k]=v f.close() elif opt=="-c": command=parm if not command in commands.keys(): raise getopt.GetoptError("unknown command (%s)" % command,None) if command == None: raise getopt.GetoptError("no command specified",None) except getopt.GetoptError,(errmsg,e): print "*** Error:", errmsg print usage() sys.exit(1) commands[command](make_automaton(sig))