这个功能,笔者是使用python实现。执行脚本后,根据提示手动输入目录的路径。脚本计算该目录及其子目录中所包含文件的md5值,比对md5值,从而打印出md5值相同的文件路径。
python完整脚本如下:
# -*- coding: utf-8 -*- def calmd5(filename): import hashlib f = open(filename,‘rb‘) m = hashlib.md5() m.update(f.read()) hash = m.hexdigest() return hash def uniq_line(): import re f = open(r‘H:\my_python_py.log‘) count_dict = {} repeat_line = [] for line in f.readlines(): line = line.strip() line = list(line) remd5 = ‘‘.join(line[-32:]) count = count_dict.setdefault(remd5,0) count += 1 count_dict[remd5] = count if count_dict[remd5] > 1: print ‘‘.join(line) repeat_line.append(remd5) print ‘ ‘ print ‘###the repeat lines before uniqed is:\n‘,repeat_line for n in repeat_line: while repeat_line.count(n) > 1: repeat_line.remove(n) print ‘ ‘ print ‘###after processed is:\n‘,repeat_line print ‘\n\n\n‘ print ‘------------Equal files are listed below---------------‘ f.close() p = open(r‘H:\my_python_py.log‘) all_file = p.readlines() for n in repeat_line: print ‘---MD5: ‘,n for lines in all_file: m = re.search(n,lines) if m:print lines[0:-33] f.close() def filepath(dir): import os f = open(r‘H:\my_python_py.log‘,‘a‘) for i in os.walk(dir): dir = i[0] name = i[2] if name != []: for files in name: files = ‘‘.join(files) fullpath = os.path.join(dir,files) remd5 = calmd5(fullpath) print fullpath+‘ ‘+remd5 f.write(fullpath) f.write(‘ ‘) f.write(remd5) f.write(‘\n‘) def main(): import os Dir = raw_input("Please input the Dir(etc:G:\\shareux\\tmp):") filepath(Dir) uniq_line() os.remove(r‘H:\my_python_py.log‘) print ‘Successfully Exited‘ if __name__ == ‘__main__‘:main()
执行效果截图如下:
找出目录中相同的文件
时间: 2024-10-09 17:50:26