csv .xlsx

def gen_file_data(fodir, fname, sheet_index=0, ):
    if fname.find(‘.xlsx‘) > -1:
        fname_open = ‘%s\\%s‘ % (fodir, fname)
        book = xlrd.open_workbook(fname_open, on_demand=True)
        sheet = book.sheet_by_index(sheet_index)
        data = [[str(c.value) for c in sheet.row(i)] for i in range(sheet.nrows)]
        book.release_resources()
        del book
    elif fname.find(‘.csv‘) > -1:
        data = []
        fname_open = ‘%s\\%s‘ % (fodir, fname)
        with open(fname_open, ‘r‘, encoding=‘utf-8‘) as csvfile:
            spamreader = csv.reader(csvfile, delimiter=‘,‘)
            for row in spamreader:
                data.append(row)
        csvfile.close()
    return data

  

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
import math
import csv

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST, next_day_tag = 1900, ‘天配额超限,限制访问‘, ‘DB_KEY_EXHAUST‘, ‘000003‘

db = ‘py_bdspider_status.db‘
db = ‘%s\\%s‘ % (curPath, db)

def db_chk_one_exist(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = ‘SELECT key FROM baidu_map_key_used WHERE key="%s"‘ % (key)
    r = 0
    res = c.execute(sql).fetchone()
    if res is not None:
        r = 1
    conn.close
    return r

# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     k_file = ‘%s\\%s‘ % (curPath, ‘bdmap_key.txt‘)
#     with open(k_file, ‘r‘, encoding=‘utf-8‘) as pf:
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace(‘ ‘, ‘‘).replace(‘\n‘, ‘‘).replace(‘\t‘, ‘‘).split(‘;‘)
#             r = db_chk_one_exist(key)
#             if r == 0:
#                 localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#                 sql = ‘INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ‘ % (
#                     author, key, localtime_, 0)
#                 c.execute(sql)
#                 conn.commit()
#     conn.close()
#     pf.close()
#
#
# db_init_key_table()

def db_recovery_bdkeynum():
    if time.strftime("%H%M%S", time.localtime()) == next_day_tag:
        conn = sqlite3.connect(db)
        c = conn.cursor()
        localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
        sql = ‘UPDATE baidu_map_key_used SET today_used = 0 ,update_time=%s  ‘ % (localtime_)
        c.execute(sql)
        conn.commit()
        conn.close()
    return

def db_get_one_effective():
    db_recovery_bdkeynum()
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = ‘SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC‘ % (MAX_USED_TIMES)
    res, r = c.execute(sql).fetchone(), ‘‘
    if res is None:
        r = DB_KEY_EXHAUST
    else:
        r = res[0]
    conn.close()
    return r

def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = ‘UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ‘ % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()

dir_, dir_exception, requested_file_list = ‘baidu_map_uid‘, ‘baidu_map_uid_exception‘, []
requested_file_dir_str, requested_file_dir_exception_str = ‘%s\\%s\\‘ % (curPath, dir_), ‘%s\\%s\\‘ % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)

def gen_requested_file_list(file_postfix=‘.html‘):
    filepath = ‘%s\\%s‘ % (curPath, dir_)
    pathDir = os.listdir(filepath)
    for allDir in pathDir:
        child = os.path.join(‘%s%s‘ % (filepath, allDir))
        requested_file = child.split(dir_)[1].split(‘&‘)[0].split(file_postfix)[0]
        if requested_file not in requested_file_list:
            requested_file_list.append(requested_file)

def gen_file_data(fodir, fname, sheet_index=0, ):
    if fname.find(‘.xlsx‘) > -1:
        fname_open = ‘%s\\%s‘ % (fodir, fname)
        book = xlrd.open_workbook(fname_open, on_demand=True)
        sheet = book.sheet_by_index(sheet_index)
        data = [[str(c.value) for c in sheet.row(i)] for i in range(sheet.nrows)]
        book.release_resources()
        del book
    elif fname.find(‘.csv‘) > -1:
        data = []
        fname_open = ‘%s\\%s‘ % (fodir, fname)
        with open(fname_open, ‘r‘, encoding=‘utf-8‘) as csvfile:
            spamreader = csv.reader(csvfile, delimiter=‘,‘)
            for row in spamreader:
                data.append(row)
        csvfile.close()
    return data

# 3 9
request_dic, target_type_list, target_type_except_list = {}, [‘北京市‘, ‘上海市‘, ‘广州市‘], [‘火车站‘, ‘高铁站‘, ‘汽车站‘, ‘飞机场‘, ‘小学‘,
                                                                                     ‘幼儿园‘, ‘中学‘,
                                                                                     ‘综合医院‘, ‘商场‘]
# [‘4s店‘,‘餐饮‘,‘家电‘,‘酒店‘,‘咖啡馆‘,‘售楼处‘,‘专科医院‘]
# [‘住宅小区‘,‘写字楼‘]

# file_postfix_l = [‘.html‘, ‘.txt‘]
# for i in file_postfix_l:
#     gen_requested_file_list(i)

fname_source = ‘jfinder_public_jmtool_old_data.csv‘
data_file = gen_file_data(curPath, fname_source)

def replace_illeagl_tag(str_):
    l = [‘ ‘, ‘\n‘, ‘\t‘]
    for i in l:
        str_ = str_.replace(i, ‘‘)
    return str_

# 碧海富通城三期(3栋) ok
# =碧海富通城-三期(3栋) ok
replace_to_empty_l = [‘ ‘, ‘|‘, ‘\t‘, ‘\n‘, ‘/‘, ‘?‘, ‘?‘, ‘·‘, ‘.‘]

def gen_bd_query_origin_name(name_):
    for i in replace_to_empty_l:
        name_ = name_.replace(i, ‘‘)
    return name_.replace(‘(‘, ‘(‘).replace(‘)‘, ‘)‘).replace(‘?‘, ‘‘).replace(‘?‘, ‘‘)

for l in data_file:
    # db_from, db_id, db_area_code, db_name, db_type_, db_city, db_district, db_address, db_street, db_uid, db_submit_time = l
    # db_from, id, area_code, name, type_, city, district, address, street, uid, submit_time = l
    dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_ = l

    if db_from == ‘db_from‘:
        continue
    request_name = gen_bd_query_origin_name(name_)
    input_ = ‘%s%s%s‘ % (city, district, request_name)
    if input_ in requested_file_list:
        print(‘requested‘, input_)
        continue
    if city not in request_dic:
        request_dic[city] = {}
    if district not in request_dic[city]:
        request_dic[city][district] = {}
        request_dic[city][district][‘request_name_list‘] = []
        request_dic[city][district][‘request_uid_list‘] = []
        request_dic[city][district][‘file_row_list‘] = []
    if request_name not in request_dic[city][district][‘request_name_list‘]:
        request_dic[city][district][‘request_name_list‘].append(request_name)
    uid = uid.replace(‘ ‘, ‘‘)
    if len(uid) > 0 and uid not in request_dic[city][district][‘request_uid_list‘]:
        request_dic[city][district][‘request_uid_list‘].append(uid)
    request_dic[city][district][‘file_row_list‘].append(l)
del data_file

base_url = ‘http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK‘
ex_l = [‘Proxy Error‘, ‘APP IP校验失败‘, ‘APP不存在,AK有误请检查再重试‘, ‘The requested URL could not be retrieved‘,
        ‘Address already in use‘, ‘天配额超限,限制访问‘, ‘Parameter Invalid‘]

write_res_file_dir = ‘%s\\%s\\‘ % (curPath, dir_)

def write_res_file(str_, input_, ak, dir_=write_res_file_dir, file_postfix=‘.txt‘):
    for ex in ex_l:
        if str_.find(ex) > -1:
            print(‘EXCEPTION-‘, ex, ‘AK-‘, ak, ‘STR-‘, str_)

            return
    fname = ‘%s%s%s‘ % (dir_, input_, file_postfix)
    with open(fname, ‘w‘, encoding=‘utf-8‘) as ft:
        ft.write(str_)
    ft.close()
    print(‘ok‘, threading.get_ident(), input_)

class MyThread(threading.Thread):
    def __init__(self, func, args, name):
        threading.Thread.__init__(self)
        self.name, self.func, self.args = name, func, args

    def run(self):
        self.func(self.args)

def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district][‘request_name_list‘]:
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace(‘R-QUERY‘, request_name).replace(‘R-CITY‘, city).replace(‘R-AK‘, ak)
                print(url_)
                input_ = ‘%s%s%s‘ % (city, district, request_name)

                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_res_file(bd_res_json_str, input_, ak)

                # try:
                #     # gen_requested_file_list()
                #     # gen_requested_file_list(‘.txt‘)
                #     # if input_ in requested_file_list:
                #     #     continue
                #     bd_res_json_str = requests.get(url_).text
                #     db_update_one_today_used(ak)
                #     write_res_file(bd_res_json_str, input_)
                # except Exception:
                #     bd_res_json_str = ‘请求百度-异常‘
                #     write_res_file(bd_res_json_str, input_, requested_file_dir_exception_str)
                #     print(bd_res_json_str, input_)

try:
    start_loop, stop_loop = int(sys.argv[1]), int(sys.argv[2])
except Exception:
    start_loop, stop_loop = -1, 200

def main():
    threads_list, nloop = [], 0
    request_dic_city_l = sorted(request_dic, reverse=False)
    for city in request_dic_city_l:
        nloop += 1
        if nloop < start_loop or nloop > stop_loop:
            continue
        thread_instance = MyThread(fun_, (city), fun_.__name__)
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()

if __name__ == ‘__main__‘:
    main()

  

时间: 2024-10-08 22:37:05

csv .xlsx的相关文章

做U3d编辑器xlsx批量生成csv和python批量转

做U3d编辑器xlsx批量生成csvxlsx编辑就是用强大的office Excel 2013 ,  也就是说 不管是程序还是美术只需要维护这个Excel就够了,然后一键批量转化为csv文件即可! 打包不会打包Excel,游戏中也不用这个!然后来到Unity3d的编辑器菜单.在Unity的Editor文件夹下面需要这四个文件: 参考了:http://www.codeproject.com/Articles/246772/Convert-xlsx-xls-to-csv 的window窗口程序. 游

Data_r_and_w(csv,json,xlsx)

import osimport sysimport argparse try:    import cStringIO as StringIOexcept:    import StringIOimport structimport jsonimport csv def import_data(import_file):    '''    Imports data from import_file.     Expects to find fixed width row    Sample r

JS-XLS/X读取和解析Excel表格文件(xls/xlsx)的JavaScript插件

最近有一个要求,我的一个项目中解析Excel文件(与xls/xlsx扩展)在浏览器(使用JavaScript当然).所以我在网上看了一下,发现了这2个有用的库: js-xls js-xlsx 让我们看看他们是如何工作的. 兼容性 支持读取文件格式 Excel 2007+ XML Formats (XLSX/XLSM) Excel 2007+ Binary Format (XLSB) Excel 2003-2004 XML Format (XML "SpreadsheetML") Exc

使用 WeihanLi.Npoi 操作 CSV

使用 WeihanLi.Npoi 操作 CSV Intro 最近发现 csv 文件在很多情况下都在使用,而且经过大致了解,csv 格式简单,相比 excel 文件要小很多,读取也很是方便,而且也很通用,微软的 ml.net 的示例项目 用来训练模型的数据也是使用的 csv 来保存的,最近又发现使用 jmeter 来测试网站的性能,也可以用 csv 来参数化请求,csv 文件操作的重要性由此可见. 此前做了一个 NPOI 的扩展 WeihanLi.Npoi,支持.net45 以及 .netstan

文件(csv/xslx)中的数据导入到数据库中

*.sql文件导入数据库通常比较方便. 其他格式如csv.xsl等文件导入比较费劲. 推荐一个针对csv\xsl的导入工具:https://github.com/zhengze/file_to_db.git 具体用法请看README: load data from csv/xlsx into database(mysql/postgresql) INSTALL pipenv install #或者 pip install -r requirements.txt CONFIG $sudo touc

数据库大作业--由python+flask

这个是项目一来是数据库大作业,另一方面也算是再对falsk和python熟悉下,好久不用会忘很快. 界面相比上一个项目好看很多,不过因为时间紧加上只有我一个人写,所以有很多地方逻辑写的比较繁琐,如果是想学习flask还是推荐之前的项目,地址:http://www.cnblogs.com/INnoVationv2/p/5837495.html 寒假回去我会重构下代码,然后再po出来. 不知道怎么做数据库大作业的也可以参考: 所有功能: 三类用户模式: 一.管理员 1.查看所有档案 2.修改档案信息

Coursera-Getting and Cleaning Data-week4-R语言中的正则表达式以及文本处理

Coursera-Getting and Cleaning Data-Week4 Thursday, January 29, 2015 补上第四周笔记,以及本次课程总结. 第四周课程主要针对text进行处理.里面包括 1.变量名的处理 2.正则表达式 3.日期处理(参见swirl lubridate包练习) 首先,变量名的处理,奉行两个原则,1)统一大小写tolower/toupper:2)去掉在导入数据时,因为特殊字符导致的合并变量 3)不要重复:4)少用代码缩写 使用的函数包括 替换查找:

JQuery 导入导出 Excel

正在做一个小项目, 从数据库中查询数据放在 HTML Table 中. 现在想要从这个 table 中导出数据来. 另外用户需要选择导出的列. 使用 jQuery 的导出插件可以完成这个需求. jQuery Plugin to Export HTML Tables 例子: 导入插件: [javascript] view plain copy <script src="jquery-tableexport/tableExport.js"></script> <

Flat File Data Load

The applicationdoes not allow to overwrite any column or change the data type of existing data. The supported file types are: .xls  .csv  .xlsx pron:  quick and easy data load con:    No delta logic available , no transformation capabilities if a .xl