#汉字数字转阿拉伯数字 1 class ConvertNum: 2 def __init__(self,cnNum): 3 self.dict = {u‘零‘:0,u‘一‘:1,u‘二‘:2,u‘三‘:3,u‘四‘:4,u‘五‘:5,u‘六‘:6,u‘七‘:7,u‘八‘:8,u‘九‘:9,u‘十‘:10,u‘百‘:100,u‘千‘:1000,u‘万‘:10000} 4 self.cnNum = cnNum 5 6 def convert(self): 7 count = 0 8 result = 0 9 tmp = 0 10 11 while count < len(self.cnNum): 12 tmpChr = self.cnNum[count:count+1] 13 tmpNum = self.dict[tmpChr] 14 15 if tmpNum == 10000: 16 result = result + tmp 17 result = result * tmpNum 18 elif tmpNum >= 10: 19 if tmp==0: 20 tmp = 1 21 result = result + tmpNum * tmp 22 tmp = 0 23 else: 24 tmp = tmpNum 25 count = count + 1 26 result = result + tmp 27 return result
1 import re 2 from convertNum import ConvertNum 3 from itertools import * 4 list_a = [u‘中华一村‘,u‘中华二村‘,u‘中华三村‘,u‘中华十二村‘,u‘中华五村‘,u‘中点开‘,u‘大地三街‘,u‘大地二街‘,u‘中华七村‘,u‘中华八村‘,u‘中华九村‘] 6 patternCombineHz = re.compile(ur‘(.*?)([一二三四五六七八九十]+)([村街巷连])$‘) 7 l,dicts = [],[] 8 for item in list_a: 9 m = patternCombineHz.search(item) #获取数字信息 10 if m: 11 dict = {‘headFootText‘:(m.group(1),m.group(3)),‘numText‘:m.group(2)} 12 dicts.append(dict) 13 else: 14 l.append(item) 15 dictHead = {} 16 for dict in dicts: #生成头尾为key,数字为value的字典 17 if dictHead.has_key(dict.get(‘headFootText‘)): 18 dictHead[dict.get(‘headFootText‘)].append(dict.get(‘numText‘)) 19 else: 20 dictHead[dict.get(‘headFootText‘)] = [dict.get(‘numText‘)] 21 for key,value in dictHead.items(): 22 dictNum = {} 23 for i in value: 24 dictNum[ConvertNum(i).convert()] = i 25 sortalbNum = sorted([ConvertNum(i).convert() for i in value]) #汉字转换阿拉伯数字后排序 26 newNum = [] 27 #排序后列表元素减去索引号,判断是否等差,并分组生成迭代器,每一组表示连续数字 28 for k,g in groupby(enumerate(sortalbNum),lambda (i,x):x-i): 29 gb = [t[1] for t in g] 30 mingb,maxgb = min(gb),max(gb) 31 if maxgb-mingb>1: 32 combineStr = u‘~‘.join(map(lambda num:dictNum.get(num),[mingb,maxgb])) 33 newNum.append(combineStr) 34 else: 35 newNum.extend(map(lambda num:dictNum.get(num),gb)) 36 # newNum = [dictNum.get(num) for num in sortalbNum] 37 if len(newNum) == 1 and not u‘~‘ in newNum[0]: 38 combineAddr = u‘%s%s%s‘ % (key[0],u‘、‘.join(newNum),key[1]) 39 else: 40 combineAddr = u‘%s(%s)%s‘ % (key[0],u‘、‘.join(newNum),key[1]) 41 l.append(combineAddr) 42 43 for i in l: 44 print i
output:>>>>>>>>
中点开
中华(一~三、五、七~九、十二)村
大地(二、三)街
python整合连续数字的练习,包含itertools\groupby用法
时间: 2024-09-29 20:07:56