w
# -*- coding: utf-8 -*- import pycurl import re import cStringIO from pypinyin import lazy_pinyin import sys, locale buf = cStringIO.StringIO() GRAB_URL = ‘http://www.gov.cn/test/2011-08/22/content_1930111.htm‘ c = pycurl.Curl() c.setopt(c.HTTPHEADER, [‘Accept-Charset: UTF-8‘]) c.setopt(c.URL, GRAB_URL) c.setopt(c.WRITEFUNCTION, buf.write) c.perform() grab_data = buf.getvalue().decode(‘utf-8‘) tmp_list = re.findall(r"\d{3}[^0]0{2}.*<", grab_data) # 词尾一样 shi_dic = {} # 词头一样 shi_dic_head = {} for i in tmp_list: tmp = i.split(‘ ‘) city = tmp[1].split(‘<‘) try: shi_exist = city[0].index(u‘市‘) if shi_exist > 1: # 深圳市 shi_tail = city[0].split(u‘市‘) # 深圳 shi = shi_tail[0] shi_pinyin = lazy_pinyin(shi) tail_pinyin = shi_pinyin[-1] head_pinyin = shi_pinyin[0] if shi_dic.has_key(tail_pinyin): shi_dic[tail_pinyin].append(shi) else: shi_dic[tail_pinyin] = [shi] if shi_dic_head.has_key(head_pinyin): shi_dic_head[head_pinyin].append(shi) else: shi_dic_head[head_pinyin] = [shi] else: continue except Exception, e: # print Exception, ":", e continue def letsgo(): info = raw_input(u‘请输入,如“深圳”:‘.encode(‘utf-8‘)).decode(sys.stdin.encoding or locale.getpreferredencoding(True)) info_pinyin = lazy_pinyin(info) info_shi_pinyin = info_pinyin[-1] flag = 1 # if shi_dic.has_key(info_shi_pinyin): # flag = 0 # for i in shi_dic[info_shi_pinyin]: # print i if shi_dic_head.has_key(info_shi_pinyin): flag = 0 for i in shi_dic_head[info_shi_pinyin]: print i if flag == 1: print "输入字符串非法" die = raw_input(u‘如继续,请输入go,如结束请输入其他‘) if die == ‘go‘: letsgo() else: print ‘程序已经退出‘ letsgo()
时间: 2024-10-16 13:15:36