先修改帐户名和密码
import requests from BeautifulSoup import BeautifulSoup import time def commitfunc(source_id, refer): print source_id, refer commiturl=‘http://download.csdn.net/index.php/comment/post_comment?jsonpcallback=jsonp1419934439524&sourceid=‘+source_id+‘&content=%E6%88%90%E5%8A%9F%E9%85%8D%E5%AF%B9%EF%BC%8C%E5%8F%AF%E4%BB%A5%E4%BD%BF%E7%94%A8%E3%80%82&rating=5&t=1419935091974‘ commitdata = { "Accept" : "text/javascript, application/javascript, */*", "Accept-Encoding" : "gzip, deflate, sdch", "Accept-Language" : "zh-CN,zh;q=0.8", "Connection" : "keep-alive", "Content-Type" :"application/x-www-form-urlencoded", "cookie" : "", "Host" : "download.csdn.net", "Refer" : refer, "User-Agent" : "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", "X-Requested-With" : "XMLHttpRequest", "jsonpcallback" : "jsonp1419934439524", "sourceid" : source_id, "content" : "a good resource, it‘s worth to download it", "rating" : "5", "t" : "1419935091974", } resp = sess_source.post(commiturl, commitdata) print resp.text def GetPageCount(): url_source_page = url_source + "1" html_source = sess_source.get(url_source_page).text soup_source = BeautifulSoup(html_source) #resource_once = soup_source.findAll(‘h3‘) page_count = soup_source.find(‘div‘, attrs={‘class‘: "page_nav"}).text page_list = page_count.split() page_ac = page_list[2].split(‘&‘) return page_ac[0][1:len(page_ac[0])-1] def CommitWholePage(page_nu): url_source_page = url_source + page_nu html_source = sess_source.get(url_source_page).text soup_source = BeautifulSoup(html_source) resource_once = soup_source.findAll(‘h3‘) for element in resource_once: if(len(element) > 1): try: #print type(element.contents[0].attrs[0][1]) #print element.contents[0].attrs[0][1] attr = element.contents[0].attrs[0][1].split(‘/‘) reftext = ‘/detail/‘ + attr[2] + ‘/‘ + attr[3] + ‘#comment‘ result = soup_source.findAll(‘a‘, attrs={‘href‘ : reftext, ‘class‘ : ‘btn-comment‘}) if len(result) != 0: #sess_source.get(url_source_page) commitfunc(attr[3], attr[2]) print attr[2] print attr[3] print "sleep" time.sleep(70) except Exception, e: s = traceback.format_exc() print s def logincsdn(): html_login = sess_source.get(url_login).text soup_login = BeautifulSoup(html_login) lt_value = soup_login.findAll(‘input‘, attrs={‘name‘: "lt"})[0][‘value‘] execution_value = soup_login.findAll(‘input‘, attrs={‘name‘: "execution"})[0][‘value‘] data_login = { "lt" : lt_value, "execution" : execution_value, "_eventId" : "submit", "username" : "用户名", "password" : "密码" } sess_source.post(url_login, data_login) print "start auto comment" #main begin url_login = "http://passport.csdn.net/account/login" url_source = "http://download.csdn.net/my/downloads/" sess_source = requests.session() logincsdn() total_page = GetPageCount() print "total page",total_page for num in range(1,int(total_page)+1): print "current page", num CommitWholePage(str(num)) print "app stop "
时间: 2024-10-13 13:20:07