新建config.py文件:
1.分析友盟网站登陆
predata={
"token":"7e63b6d788d9a9fa8478646a508bd44c",
"username":"用户名",
"password":"密码",
"sig":"",
"sessionid":"",
"website":"umengplus",
"app_id":"cnzz",
"url":"https://web.umeng.com/main.php?c=site&a=show&from=login"
}
headers={
":authority":"i.umeng.com",
":method":"POST",
":path":"/login/ajax_do",
":scheme":"https",
"accept":"*/*",
"accept-encoding":"gzip, deflate",
"accept-language":"zh-CN,zh;q=0.8",
"content-length":"200",
"content-type":"application/x-www-form-urlencoded; charset=UTF-8",
"cookie":"PHPSESSID=7ordmljtqnn5khdkv0eoff9f85; cn_ea1523f470091651998a_dplus=%7B%22distinct_id%22%3A%20%2215ad491eefd30b-05c8ec563-424e002e-13c680-15ad491eefff6%22%2C%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201489625019%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201489625019%7D; _uab_collina=148962502090448104944825; cn_a61627694930aa9c80cf_dplus=%7B%22distinct_id%22%3A%20%2215ad491eefd30b-05c8ec563-424e002e-13c680-15ad491eefff6%22%7D; UM_distinctid=15ad491eefd30b-05c8ec563-424e002e-13c680-15ad491eefff6; CNZZDATA1258498910=326642109-1489621238-null%7C1489621238; UMPLUSCENTER=3058a47b577f1e4344d42b6a47e1da910bfbb32c; cna=/xBQEaOvVRUCAXVHNS5uBFpE; l=AhoaszT4hEBlhJd2dqUuVHfQ6so9k54m; isg=AjEx7bLizQlglWEdXEm6PELUQL2SPGcFGl1rAhNH9fgUOlKMWWxlYGocKnmm",
"origin":"https://i.umeng.com",
"referer":"https://i.umeng.com/?app_id=cnzz&redirectURL=https%3A%2F%2Fweb.umeng.com%2Fmain.php%3Fc%3Dsite%26a%3Dshow%26from%3Dlogin",
"user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36",
"x-requested-with":"XMLHttpRequest"
}
2.分析我的站点
在config.py中添加如下:
url_list=‘https://web.umeng.com/main.php?c=site&a=show&ajax=module=list|module=isOpenTongji&search=¤tPage=1&pageType=30&sort=0‘
新建cnzz_resource_resource.py脚本
import json
import requests
import os
import time
import datetime
from pymongo import MongoClient
import config
import traceback
import random
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
class CnzzCatch:
def __init__(self):
self.site_id = ‘‘
self.name = ‘‘
self.config = config
self.dbclient = MongoClient(‘192.168.86.126‘,27017)
self.pre_day = ‘‘
self.sites=[]
self.s=requests.Session()
self.__init_login()
def __init_login(self):
try:
self.s.post(self.config.loginUrl,data=self.config.predata,headers=self.config.headers)
time.sleep(random.random())
r = json.loads(self.s.get(self.config.url_list).content)
print r
for temp in r[‘data‘][‘list‘][‘items‘]:
if isinstance(temp,dict):
self.sites.append({"siteid":temp["siteid"],"name":temp["name"]})
except:
traceback.print_exc()
if __name__ == ‘__main__‘:
x = CnzzCatch()
x.run()
x.close()