python md5用于判重
1 # -*- coding: gbk -*- 2 #!/usr/bin/env python 3 from sets import Set 4 import os 5 import md5 6 class CAvoidSame: 7 def __init__(self,md5fname): 8 self.md5fname = md5fname; 9 self.md5set = Set() 10 if os.path.exists( self.md5fname ) == False: 11 fMD5=open(self.md5fname,"w+" 12 fMD5.close() 13 return 14 else: 15 fMD5=open(self.md5fname,"r+" 16 while True: 17 md5data = fMD5.read(16) 18 md5datalen = len(md5data) 19 if md5datalen == 16: 20 self.md5set.add(md5data) 21 continue 22 if md5datalen == 0: 23 fMD5.close() 24 break 25 nToFill = 16 - md5datalen 26 while nToFill > 0: 27 md5data+="0" 28 fMD5.write("0" 29 nToFill-=1 30 self.md5set.add(md5data) 31 fMD5.close() 32 break 33 def IsInAddIfNot(self,aStr): 34 m = md5.new(aStr) 35 bIsIn = m.digest() in self.md5set 36 if bIsIn == False: 37 fMD5 = open(self.md5fname,"a" 38 fMD5.write(m.digest()) 39 fMD5.close() 40 self.md5set.add(m.digest()) 41 return bIsIn 42 43 44 if __name__ == "__main__":
45 avoidSame = CAvoidSame("test.md5" 46 print avoidSame.IsInAddIfNot("aaa" 47 print avoidSame.IsInAddIfNot("bbb" 48 print avoidSame.IsInAddIfNot("ccc" 49 print avoidSame.IsInAddIfNot("aaa" 50 print avoidSame.IsInAddIfNot("bbb" 51 print avoidSame.IsInAddIfNot("ddd"