一、打印最常用的10条linux命令
#!/usr/bin/python #coding=utf-8 import os from collections import Counter c = Counter() with open(os.path.expanduser('~/.bash_history')) as f: for line in f: cmd = line.strip().split() if cmd: c[cmd[0]]+=1 print c.most_common(10)
效果如下:
二、找到目录下重复的文件
#!/usr/bin/python #coding=utf-8 from __future__ import print_function import sys import hashlib import os import fnmatch CHUNK_SIZE=8192 def is_file_match(filename,patterns): for pattern in patterns: if fnmatch.fnmatch(filename,pattern): return True return False def find_specific_files(root,patterns=['*'],exclude_dirs=[]): for root,dirnames,filenames in os.walk(root): for filename in filenames: if is_file_match(filename,patterns): yield os.path.join(root,filename) for d in exclude_dirs: if d in dirnames: dirnames.remove(d) def get_chunk(filename): with open(filename) as f: while True: chunk=f.read(CHUNK_SIZE) if not chunk: break else: yield chunk def get_file_checksum(filename): h=hashlib.md5() for chunk in get_chunk(filename): h.update(chunk) return h.hexdigest() def main (): sys.argv.append("") directory=sys.argv[1] if not os.path.isdir(directory): raise SystemExit ("{0} is not a directory".format(directory)) record={} for item in find_specific_files(directory): checksum=get_file_checksum(item) if checksum in record: print ('find duplicate files: {0} vs {1}'.format(record[checksum],item)) else: record[checksum]=item if __name__ == '__main__': main()
效果如下:
原文地址:http://blog.51cto.com/11555417/2117594
时间: 2024-11-06 17:18:16