import datetime import re #def convert_time(timestr): # return datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S %z") #def convert_request(request:str): # return dict(zip((‘method‘,‘url‘,‘protocol‘),request.split())) def extract(line): #pattern = ‘‘‘(?P<http_cdn_src_ip>[\d\.]{7,}) - - \[(?P<time_local>[^\[\]])\] "(?P<request>[^"])" (?P<status>\d+) (?P<body_bytes_sent>\d+) "-" "(?P<http_user_agent>[^"])" - [\d\.]{7,} \d.\d+ [\d\.]{7,}:\d+ \d.\d+‘‘‘ #regex = re.compile(pattern) p=‘‘‘(?P<http_cdn_src_ip>[\d\.]{7,}) - - \[(?P<time_local>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<body_bytes_sent>\d+) "-" "(?P<http_user_agent>[^"]+)"‘‘‘ mat=re.match(p,line) #matcher=regex.search(line) if mat is not None: d = mat.groupdict() print(d) else: print("re.search() returned None") return d names = [‘http_cdn_src_ip‘,‘-‘,‘remote_user‘,‘time_local‘,‘request‘, ‘status‘,‘body_bytes_sent‘,‘http_referer‘, ‘http_user_agent‘, ‘http_x_forwarded_for‘, ‘remote_addr‘, ‘request_time‘,‘upstream_addr‘,‘upstream_response_time‘] ops={‘time_local‘:lambda timestr:datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S %z"), ‘request‘:lambda request:dict(zip((‘method‘,‘url‘,‘protocol‘),request.split())), ‘status‘:int, ‘body_bytes_sent‘:int, } log_data=‘‘‘1.24.17.6 - - [07/Dec/2017:15:55:07 +0800] "GET /tch/ApchReprt/getAllon HTTP/1.1" 200 113 "-" "Mozilla/5.0 (Linux; Android 6.0.1; OPPO R9s Build/MMB29M; wv)\ AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043015 \Safari/537.36 V1_AND_SQ_6.6.9_482_YYB_D QQ/6.6.9.3060 NetType/4G WebP/0.3.0 Pixel/1080" - 68.20.3.21 0.058 127.0.0.1:1111 0.014‘‘‘ d={} #dd={k:ops.get(k, lambda x:x)(v) for k,v in extract(log_data).items()} for k,v in extract(log_data).items(): if not ops.get(k): d[k]=v else: d[k] = ops.get(k)(v) #try: # d[k] = ops.get(k)(v) #except: # d[k]=v print(d)
原文地址:https://www.cnblogs.com/kuku0223/p/12177083.html
时间: 2024-11-01 22:46:08