对代码的优化 对抗 对硬件台数的提高

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1700, ‘天配额超限,限制访问‘, ‘DB_KEY_EXHAUST‘

db = ‘py_bdspider_status.db‘
db = ‘%s\\%s‘ % (curPath, db)

# pcity_list = []
# pcity_file = ‘%s\\%s‘ % (curPath, ‘省会城市.txt‘)
# with open(pcity_file, ‘r‘, encoding=‘utf-8‘) as pf:
#     c_ = 0
#     for i in pf:
#         c_ += 1
#         if c_ == 3:
#             c_ = 0
#             pcity_list.append(i.replace(‘ ‘, ‘‘).replace(‘\n‘, ‘‘) + ‘市‘)
# pcity_sorted_list = sorted(pcity_list)
#
# target_city_list_big = [‘广州市‘, ‘厦门市‘, ‘深圳市‘, ‘北京市‘, ‘杭州市‘, ‘成都市‘, ‘上海市‘, ‘西安市‘]
# target_city_list_pass = target_city_list_big
#
# for i in pcity_list:
#     if i not in target_city_list_big:
#         target_city_list_pass.append(i)

# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = ‘DELETE  FROM  baidu_map_key_used‘
#     c.execute(sql)
#     conn.commit()
#     pcity_file = ‘%s\\%s‘ % (curPath, ‘bdmap_key.txt‘)
#     with open(pcity_file, ‘r‘, encoding=‘utf-8‘) as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace(‘ ‘, ‘‘).replace(‘\n‘, ‘‘).replace(‘\t‘, ‘‘).split(‘;‘)
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = ‘INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ‘ % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()
#     pf.close()

# db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]

def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = ‘SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC‘ % (MAX_USED_TIMES)

    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close

def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = ‘UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ‘ % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()

dir_, dir_exception, requested_file_list = ‘baidu_map_uid_page‘, ‘baidu_map_uid_page_exception‘, []
requested_file_dir_str, requested_file_dir_exception_str = ‘%s\\%s\\‘ % (curPath, dir_), ‘%s\\%s\\‘ % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)

def gen_requested_file_list(file_postfix=‘.html‘):
    filepath = ‘%s\\%s‘ % (curPath, dir_)
    pathDir = os.listdir(filepath)
    for allDir in pathDir:
        child = os.path.join(‘%s%s‘ % (filepath, allDir))
        requested_file = child.split(dir_)[1].split(‘&‘)[0].split(file_postfix)[0]
        if requested_file not in requested_file_list:
            requested_file_list.append(requested_file)

def gen_file_data(fname_source, file_type=‘.xlsx‘):
    fname_open = ‘%s\\%s‘ % (curPath, fname_source)
    excel_ = ‘%s%s‘ % (fname_open, file_type)
    book = xlrd.open_workbook(excel_, on_demand=True)
    sheet = book.sheet_by_index(0)
    data = [[str(c.value) for c in sheet.row(i)] for i in range(sheet.nrows)]
    book.release_resources()
    del book
    return data

request_dic, target_type_list, target_type_except_list = {}, [‘售楼处‘, ‘酒店‘, ‘专科医院‘, ‘家电‘, ‘家居建材‘, ‘咖啡馆‘], [‘住宅小区‘, ‘写字楼‘,
                                                                                                          ‘商场‘, ‘小学‘,
                                                                                                          ‘中学‘, ‘4S店‘,
                                                                                                          ‘汽车站‘, ‘火车站‘,
                                                                                                          ‘高铁站‘, ‘飞机场‘]
file_postfix_l = [‘.html‘, ‘.txt‘]
for i in file_postfix_l:
    gen_requested_file_list(i)

fname_source = ‘【TEAM】采集员新增任务133598条-楼宇归集-互异百度uid数51700‘
# fname_source = ‘【TEAM】采集员新增任务133598条-楼宇归集-互异百度uid数51700-is_building170901140053‘
data_selfadd = gen_file_data(fname_source)

def replace_illeagl_tag(str_):
    l = [‘ ‘, ‘\n‘, ‘\t‘]
    for i in l:
        str_ = str_.replace(i, ‘‘)
    return str_

# gen_requested_file_list()
# gen_requested_file_list(‘.txt‘)

for l in data_selfadd:
    id, area_code, type_, city, district, uid, name, address, street, name_reduction, submit_time = l
    # id, area_code, type_, city, district, uid, name, address, street, name_reduction, submit_time, is_building, name_, addr_ = l

    if len(uid.replace(‘ ‘, ‘‘)) < 6:
        continue

    # if type_ in target_type_except_list:
    #     continue
    # if len(uid.replace(‘ ‘, ‘‘)) < 6 or is_building == ‘0‘ or is_building == ‘1‘:
    #     continue
    city, district, uid = replace_illeagl_tag(city), replace_illeagl_tag(district), replace_illeagl_tag(uid)
    input_ = ‘%s%s%s‘ % (city, district, uid)
    if input_ in requested_file_list:
        print(‘requested‘, input_)
        continue
    if city not in request_dic:
        request_dic[city] = {}
    if district not in request_dic[city]:
        request_dic[city][district] = {}
        request_dic[city][district][‘uid_list‘] = []
        request_dic[city][district][‘file_row_list‘] = []
    if uid not in request_dic[city][district][‘uid_list‘]:
        request_dic[city][district][‘uid_list‘].append(uid)
    request_dic[city][district][‘file_row_list‘].append(l)
del data_selfadd

fname_source = ‘【TEAM】41876条JMTool官方数据百度POIuid_添加率0.9388_170830171339‘
# fname_source = ‘【TEAM】41876条JMTool官方数据百度POIuid_添加率0.9388_170830171339-is_building170901140150‘
data_jmtool = gen_file_data(fname_source)
for l in data_jmtool:
    dbid, area_code, name_, request_name, type_, city, district, addr, street, bd_status, bd_message, bd_res_str, city_bd, district_bd, business_bd, cityid_bd, name_bd, uid, lat_bd, lng_bd, compute_res, name_ratio_res, combine_ratio_res, uid_href = l
    # dbid, area_code, name_, request_name, type_, city, district, addr, street, bd_status, bd_message, bd_res_str, city_bd, district_bd, business_bd, cityid_bd, name_bd, uid, lat_bd, lng_bd, compute_res, name_ratio_res, combine_ratio_res, uid_href, is_building, name_, addr_ = l

    # if type_ in target_type_except_list:
    #     continue
    city, district, uid = replace_illeagl_tag(city), replace_illeagl_tag(district), replace_illeagl_tag(uid)
    if len(uid.replace(‘ ‘, ‘‘)) < 6:
        continue
    # if len(uid.replace(‘ ‘, ‘‘)) < 6 or is_building == ‘0‘ or is_building == ‘1‘:
    #     continue
    input_ = ‘%s%s%s‘ % (city, district, uid)
    if input_ in requested_file_list:
        print(‘requested‘, input_)
        continue

    if city not in request_dic:
        request_dic[city] = {}
    if district not in request_dic[city]:
        request_dic[city][district] = {}
        request_dic[city][district][‘uid_list‘] = []
        request_dic[city][district][‘file_row_list‘] = []
    if uid not in request_dic[city][district][‘uid_list‘]:
        request_dic[city][district][‘uid_list‘].append(uid)
    request_dic[city][district][‘file_row_list‘].append(l)
del data_jmtool

write_res_file_dir = ‘%s\\%s\\‘ % (curPath, dir_)

ex_l = [‘Proxy Error‘, ‘APP IP校验失败‘]

def write_res_file(str_, input_, dir_=write_res_file_dir, file_postfix=‘.txt‘):
    for ex in ex_l:
        if str_.find(ex) > -1:
            print(‘EXCEPTION-‘, ex)
            return
    fname = ‘%s%s%s‘ % (dir_, input_, file_postfix)
    with open(fname, ‘w‘, encoding=‘utf-8‘) as ft:
        ft.write(str_)
    ft.close()
    print(‘ok‘, threading.get_ident(), input_)

class MyThread(threading.Thread):
    def __init__(self, func, args, name):
        threading.Thread.__init__(self)
        self.name, self.func, self.args = name, func, args

    def run(self):
        self.func(self.args)

requested_type_counter = 0
base_url = ‘http://api.map.baidu.com/place/v2/detail?uid=UID&output=json&scope=2&ak=AK‘

def fun_(city):
    for district in request_dic[city]:
        for uid in request_dic[city][district][‘uid_list‘]:
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace(‘UID‘, uid).replace(‘AK‘, ak)
            input_ = ‘%s%s%s‘ % (city, district, uid)
            try:
                # gen_requested_file_list()
                # gen_requested_file_list(‘.txt‘)
                if input_ in requested_file_list:
                    continue
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_res_file(bd_res_json_str, input_)
            except Exception:
                bd_res_json_str = ‘请求百度-异常‘
                write_res_file(bd_res_json_str, input_, requested_file_dir_exception_str)
                print(bd_res_json_str, input_)

city_num, start_loop, stop_loop = len(request_dic), 0, 100
thread_max = city_num

def main():
    threads_list, nloop = [], 0
    request_dic_city_l = sorted(request_dic, reverse=False)
    for city in request_dic_city_l:
        nloop += 1
        if nloop < start_loop or nloop > stop_loop:
            continue
        thread_instance = MyThread(fun_, (city), fun_.__name__)
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()

if __name__ == ‘__main__‘:
    main()

  

时间: 2024-08-29 19:05:21

对代码的优化 对抗 对硬件台数的提高的相关文章

C++ 代码性能优化 -- 循环分割提高并行性

对于一个可结合和可交换的合并操作来说,比如整数的加法或乘法, 我们可以通过将一组合并操作分割成 2 个或更多的部分,并在最后合并结果来提高性能. 原理: 普通代码只能利用 CPU 的一个寄存器,分割后可以利用多个寄存器. 当分割达到一个数量时,寄存器用完,性能不再提升,甚至会开始下降. 用代码来描述,如下: // 一般情况下的代码 for (i = 1; i < n+1; i++) { res = res OPER i; } // 循环分割后代码 for (i = 1; i < n; i+=2

Java代码性能优化技巧

流方面: private FileOutputStream writer; writer = new FileOoutputStream(fileName); 使用BufferedOutputStream 对写入FileOutputStream的数据进行缓存 //将writer的类型由FileOutputStream 变更为BufferedOutputStream //private FileOutputStream writer; private BufferedOutputStream wr

Android代码内存优化建议-Android官方篇

转自:http://androidperformance.com/ http://developer.android.com/intl/zh-cn/training/displaying-bitmaps/index.html 为了使垃圾回收器可以正常释放程序所占用的内存,在编写代码的时候就一定要注意尽量避免出现内存泄漏的情况(通常都是由于全局成员变量持有对象引用所导致的),并且在适当的时候去释放对象引用.对于大多数的应用程序而言,后面其它的事情就可以都交给垃圾回收器去完成了,如果一个对象的引用不

Java 代码性能优化总结

35 个 Java 代码性能优化总结 前言 代码优化,一个很重要的课题.可能有些人觉得没用,一些细小的地方有什么好修改的,改与不改对于代码的运行效率有什么影响呢?这个问题我是这么考虑的,就像大海里面的鲸鱼一样,它吃一条小虾米有用吗?没用,但是,吃的小虾米一多之后,鲸鱼就被喂饱了.代码优化也是一样,如果项目着眼于尽快无BUG上线,那么此时可以抓大放小,代码的细节可以不精打细磨:但是如果有足够的时间开发.维护代码,这时候就必须考虑每个可以优化的细节了,一个一个细小的优化点累积起来,对于代码的运行效率

关于前端代码性能优化问题

以下观点纯属个人看法: 对于一个刚接触前端不久的人来说,前端的代码质量是很重要的一部分,毕竟关系到性能问题.个人认为关于代码性能优化主要由这几方面:HTML.CSS.Javascript和HTTP,所以对这四个方面的优化能提高浏览器的性能.个人知识面有限,只有学到以下的知识,另外的还没涉及,不足之处还请体谅. 一. HTML 1.首先是对HTML5的充分熟悉和理解,理解标签的语义化,减少对标签的重定义. 2.命名的规范化,对id.class.name的规范命名有助于整体代码的构建,方便他人的理解

代码的优化

在VS中,有两个编译器开关会影响生成的代码的优化: 一个是在项目属性->生成->优化代码选项,如果没有选中该选项,则生成的IL代码是没有经过优化的,在IL文件中会包含很多NOP指令,这些指令是空操作指令,作用是方便设置断点,在流程控制指令后边都会添加NOP指令,对于下面的简单代码: static class Program { /// <summary> /// 应用程序的主入口点. /// </summary> [STAThread] static void Main

Python 代码性能优化技巧(转)

原文:Python 代码性能优化技巧 Python 代码优化常见技巧 代码优化能够让程序运行更快,它是在不改变程序运行结果的情况下使得程序的运行效率更高,根据 80/20 原则,实现程序的重构.优化.扩展以及文档相关的事情通常需要消耗 80% 的工作量.优化通常包含两方面的内容:减小代码的体积,提高代码的运行效率. 改进算法,选择合适的数据结构 一个良好的算法能够对性能起到关键作用,因此性能改进的首要点是对算法的改进.在算法的时间复杂度排序上依次是: O(1) -> O(lg n) -> O(

关于ArrayUtil代码的优化

public int[] removeZero(int[] oldArray) { //传进空数组是返回空数组 if(oldArray == null) return null; int count = 0; //统计非零元素个数 int b[] = new int[oldArray.length]; //先统计非零元素个数,并将非零元素存入一个和原数组同样大小的新数组 for(int i=0; i < oldArray.length; i++) { if(oldArray[i] != 0) {

iOS对项目中所有加阴影的代码进行优化

1. 对项目中所有加阴影的代码进行优化 目前项目中尤其是表格单元格中使用如下加阴影代码严重影响性能(5.2.5航班查询结果页卡顿的原因)     self.cellBG.layer.shadowColor = [[UIColor colorWithRed:0.8 green:0.8 blue:0.8 alpha:1] CGColor];     self.cellBG.layer.shadowOffset = CGSizeMake(1, 1);     self.cellBG.layer.sha