discuz 中实现从微信抓取消息并发帖

这是我入职公司写的第一个程序，当时并没有太多的接触php，对php也不是很了解进过慢慢的学习完成的，虽然很low但我觉得很有意义！

<?php 

$_G['wx_g'] = array('init' => array(
				"wx_thread" => array("fid" => 36, "author" => "admin", "authorid" => "1", "lastposter" => "admin", "uid" => 1, "width" => 166, "usesig" => 1),
				"wx_content" => array("weixin_user" => "微信账号", "weixin_pass" => "微信密码")
			)
);

wx_login();

$messge_list = get_message_list();
//print_r( $messge_list );exit;

foreach ($messge_list['item'][0]['multi_item'] as $key => $val){

	if(!DB::result_first("select count(wid) from test.jiaojing_weixintable where wid= {$val['file_id']}")){

		$val['title']=mb_convert_encoding($val['title'], 'GBK','UTF-8');

		$val['content'] = get_content($val['content_url']);
		$val['create_time'] = $messge_list['item'][0]['create_time'];//创建时间在外围数组中 转移到全局变量中
		//写入微信表
		$query = "INSERT INTO test.jiaojing_weixintable(wid,title,content,date_time)VALUES ({$val['file_id']},'{$val['title']}','{$val['content']}','{$val['create_time']}')";
		$count1=DB::query($query);
		if($count1){
			$filesize=0;

			$new_img_url=NULL;

			//$img_url =$val['cover'];
			//图片地址是https 无法 保存 转http
			//https to http
			$img_url=explode('https',$val['cover']);
			$img_url=$img_url[1];
			$img_url='http'.$img_url;

			$img_name = DISCUZ_ROOT . "data/attachment/forum/jiaojing_images/{$val['file_id']}.jpg";

			getImage($img_url, $img_name);
			$new_img_url = "jiaojing_images/{$val['file_id']}.jpg";
			$filesize=filesize($img_name);

			//更新表

			DB::query("UPDATE test.jiaojing_weixintable SET img_url='$new_img_url', img_size='".$filesize."' WHERE wid='{$val['file_id']}'");
		}
		//写入微信表

		/* ****************** */

		//写入论坛帖子

		DB::result_first("insert into pre_forum_post_tableid set pid=0" );
		$newpid=DB::insert_id();

		$pid=$newpid; //贴子的ID
		$fid=$_G['wx_g']['init']['wx_thread']['fid']; //贴子所在版块的版块ID
		$author="{$_G['wx_g']['init']['wx_thread']['author']}"; //贴子的发表者的会员名称
		$authorid=$_G['wx_g']['init']['wx_thread']['authorid'];//贴子的发表者的会员UID编号
		$subject="{$val['title']}";//贴子的标题
		$dateline=$val['create_time'];//发表贴子的日期时间(时间轴)
		$lastpost=$val['create_time']; //最后发表日期
		$lastposter="{$_G['wx_g']['init']['wx_thread']['lastposter']}"; //最后发表的会员名称
		$message="{$val['content']}";//贴子的内容
		$useip= $_SERVER["REMOTE_ADDR"]; //发表者的IP
		$uid=$_G['wx_g']['init']['wx_thread']['uid'];//使用搜索功能的会员的UID编号
		$usesig=$_G['wx_g']['init']['wx_thread']['usesig']; //是否显示个性签名 1 or 0
		////$aid=$arr['aid'];//贴子里的附件ID

		$width=$_G['wx_g']['init']['wx_thread']['width'];//附件缩略图宽
		$attachment1 = 2; //是否为有附件的主题

		$filename="{$val['file_id']}.jpg";
		//$filename=explode('/',$new_img_url); //$filename="{$arr['filename']}";
		//$filesize=$filesize;
		$attachment=$new_img_url; //$attachment="{$arr['attachment']}";
		//添加贴子

		DB::query("insert into " . DB::table('forum_thread'). " (fid,author,authorid,subject,dateline,lastpost,lastposter) values($fid,'{$author}',$authorid,'{$subject}',$dateline,$lastpost,'{$lastposter}')");
		$tid=$insert_id = DB::insert_id()."";//返回 tid 贴子所在主题的主题ID
		DB::query("insert into " . DB::table('forum_newthread'). "(tid,fid,dateline)values($tid,$fid,$dateline)");
		DB::query("insert into " . DB::table('forum_post'). "(pid,tid,fid,first,author,authorid,subject,dateline,message,useip,usesig,htmlon) values($pid,$tid,$fid,1,'{$author}',$authorid,'{$subject}',$dateline,'{$message}','{$useip}',{$usesig},1)");

		//if($attachment1==2){ //2表示有附件的代码
		$tableid = dintval($tid{strlen($tid)-1});//附件信息存储表pre_forum_attachment0-9中的分布表
		//echo $tableid;

		//更新贴子加入附件
		//DB::query("update pre_forum_post set attachment={$attachment1} where tid={$tid}");
		DB::query("UPDATE " . DB::table('forum_thread'). " SET attachment = {$attachment1} WHERE tid = '".$tid."'");
		DB::query("UPDATE " . DB::table('forum_post'). " SET attachment = {$attachment1} WHERE pid = '".$pid."'");

		//附件
		//tableid > pre_forum_attachment0-9中的分布表
		DB::result_first("insert into " . DB::table('forum_attachment'). " (tid,pid,uid,tableid)values($tid,$pid,$uid,$tableid)");
		$aid=DB::insert_id();
		DB::query("UPDATE " . DB::table('forum_attachment') . "  SET tid = '".$tid."', pid = '".$pid."', uid = '".$uid."', tableid = '".$tableid."' WHERE aid = '$aid'");
		DB::query("INSERT INTO " . DB::table('forum_attachment_'.$tableid) . " ( aid, tid, pid, uid, dateline, filename, filesize, width, attachment, isimage ) VALUES ('$aid','".$tid."','".$pid."','".$uid."','".$dateline."','$filename','" . $filesize. "', '500' , '" . $attachment . "', 1)");

		//}

		C::t('forum_forum')->update($fid, array('lastpost' => $lastpost));
		C::t('forum_forum')->update_forum_counter($fid, 1, 1, 1);

		//写入论坛帖子

	}

}

function get_content($url){

	global $_G;

	$message = file_get_contents($url);
	preg_match('/<div class="text">(.*)<\/div>/', $message, $content);//内容
	$content=mb_convert_encoding($content[1], "GBK","UTF-8");//获取过滤出的 完整微信内容 转码
	$content = strip_tags($content);

	return $content;

}

function get_message_list(){

	global $_G;

	$cookie=$_G['wx_g']['cookie'];

	$url = "http://mp.weixin.qq.com/cgi-bin/appmsg?begin=0&count=10&t=media/appmsg_list&type=10&action=list&token=".$_G['wx_g']['token']."&lang=zh_CN";

	$ch = curl_init();

	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_COOKIE, $cookie);
	curl_setopt($ch, CURLOPT_REFERER, "http://mp.weixin.qq.com/cgi-bin/appmsg?begin=0&count=10&t=media/appmsg_list&type=10&action=list&token=".$_G['wx_g']['token']."&lang=zh_CN");
	curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0");
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION,true);
	curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
	curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
	curl_setopt($ch, CURLOPT_SSLVERSION, 3);
	$output2 = curl_exec($ch);
	curl_close($ch);
	//echo $output2;exit;
	$output1=explode('wx.cgiData = ',$output2);
	$output1=$output1[1];
	$output1=explode(',"file_cnt":',$output1);
	$output1=$output1[0];
	$output1.='}';

	$message_list=json_decode($output1,true);
	//$message_list=mb_convert_encoding($message_list, "GBK","UTF-8");
	//print_r($message_list);exit;

	return $message_list;

}

function wx_login(){

	global $_G;
	//echo $_G['wx_g']['init']['wx_content']['weixin_user'];exit;
	$username = $_G['wx_g']['init']['wx_content']['weixin_user'];
	$pwd = md5($_G['wx_g']['init']['wx_content']['weixin_pass']);

	$url = "https://mp.weixin.qq.com/cgi-bin/login?lang=zh_CN";
	$post_data = "username=".$username."&pwd=".$pwd."&imgcode=&f=json";
	$cookie = "pgv_pvid=2067516646";
	$ch = curl_init();
	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_HEADER, 1);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_POST, 1);
	curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
	curl_setopt($ch, CURLOPT_COOKIE, $cookie);
	curl_setopt($ch, CURLOPT_REFERER, "https://mp.weixin.qq.com/cgi-bin/loginpage?t=wxm2-login&lang=zh_CN");
	curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0");
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION,true);
	curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
	curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
	curl_setopt($ch, CURLOPT_SSLVERSION, 3);
	$output = curl_exec($ch);
	curl_close($ch);

	//echo $output;exit;

	list($header, $body) = explode("\r\n\r\n", $output);

	preg_match_all("/set\-cookie:([^\r\n]*)/i", $header, $matches);

	if(!empty($matches[1][2])){
		$cookie = $matches[1][0].$matches[1][1].$matches[1][2].$matches[1][3];
	}else{
		$cookie = $matches[1][0].$matches[1][1];
	}

	$cookie = str_replace(array('Path=/',' ; Secure; HttpOnly','=;'),array('','','='), $cookie);
	$cookie = 'pgv_pvid=6648492946;'.$cookie;

	$data = json_decode($body,true);
	$result = explode('token=',$data['redirect_url']);
	$token = $result[1];
	if(!$token) cpmsg($installlang['import_error_password'], "{$request_url}&step=import&pswerror=1", 'error');

	//写入到全局变量
	$_G['wx_g']['cookie'] = $cookie;
	$_G['wx_g']['token'] = $token;

}

//微信图片保存函数
function getImage($url,$filename=''){
	global $_G;
	$cookie=$_G['wx_g']['cookie'];
	if($url==''){return false;}
	//文件保存路径

	$ch=curl_init();
	//$timeout=120;
	$ch=curl_init();
	curl_setopt($ch,CURLOPT_URL,$url);
	curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
	curl_setopt($ch, CURLOPT_COOKIE, $cookie);
	curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0");

	$img=curl_exec($ch);
	curl_close($ch);

	$size=strlen($img);
	//文件大小
	[email protected]($filename,'a');
	fwrite($fp2,$img);
	fclose($fp2);

	return $filename;
}

时间： 2024-08-01 10:24:43

discuz 中实现从微信抓取消息并发帖的相关文章

python threading模块中对于信号的抓取

最近的物联网智能网关(树莓派)项目中遇到这样一个问题:要从多个底层串口读取发来的数据,并且做出相应的处理,对于每个串口的数据的读取我能想到的可以采用两种方式: 一种是采用轮询串口的方式,例如每3s向每个串口的buffer区去取一次数据,但是这样可能会有缓冲区溢出的可能,同时,数据的同步也可能会出现一定的问题,因为数据的上传周期是可以用户自定义的,一旦用户定义的上传周期过短或过长,都可能造成读取的数据出问题. 另一种方式,就是采用多线程方式,把每个串口读取数据放在单独的子线程中,每个子线程阻塞于串

MQTT的学习研究（九）基于HTTP GET MQTT 抓取消息服务端使用

官方参看文档: HTTP GET 接收主题请求协议和响应协议http://publib.boulder.ibm.com/infocenter/wmqv7/v7r0/topic/com.ibm.mq.csqzau.doc/ts21230_.htm HTTP POST 发布主题请求协议和响应协议http://publib.boulder.ibm.com/infocenter/wmqv7/v7r0/topic/com.ibm.mq.csqzau.doc/ts21220_.htm HTTP DELETE

python抓取数据，python使用socks代理抓取数据

在python中,正常的抓取数据直接使用urllib2 这个模块: import urllib2 url = 'http://fanyi.baidu.com/' stream = urllib2.urlopen(url) cont = stream.read() print cont 如果要走http代理的话,我们也可以使用urllib2,不需要引用别的模块: import urllib2 url = 'https://clients5.google.com/pagead/drt/dn/dn.j

利用scrapy抓取网易新闻并将其存储在mongoDB

好久没有写爬虫了,写一个scrapy的小爬爬来抓取网易新闻,代码原型是github上的一个爬虫,近期也看了一点mongoDB.顺便小用一下.体验一下NoSQL是什么感觉.言归正传啊.scrapy爬虫主要有几个文件须要改动.这个爬虫须要你装一下mongodb数据库和pymongo,进入数据库之后.利用find语句就能够查看数据库中的内容,抓取的内容例如以下所看到的: { "_id" : ObjectId("5577ae44745d785e65fa8686"), &qu

关于http抓取字段的一些事

一. 需求万维网上有着无数的网页,包含着海量的信息,无孔不入.森罗万象.但很多时候,无论出于数据分析或产品需求,我们需要从某些网站,提取出我们感兴趣.有价值的内容,但是纵然是进化到21世纪的人类,依然只有两只手,一双眼,不可能去每一个网页去点去看,然后再复制粘贴.所以我们需要一种能自动获取网页内容并可以按照指定规则提取相应内容的程序,这就是爬虫. 二. 原理传统爬虫从一个或若干初始网页的URL开始,获得初始网页上的URL,在抓取网页的过程中,不断从当前页面上抽取新的URL放入队列,直到满足系

Python爬虫：新浪新闻详情页的数据抓取（函数版）

上一篇文章<Python爬虫:抓取新浪新闻数据>详细解说了如何抓取新浪新闻详情页的相关数据,但代码的构建不利于后续扩展,每次抓取新的详情页时都需要重新写一遍,因此,我们需要将其整理成函数,方便直接调用. 详情页抓取的6个数据:新闻标题.评论数.时间.来源.正文.责任编辑. 首先,我们先将评论数整理成函数形式表示: 1 import requests 2 import json 3 import re 4 5 comments_url = 'http://comment5.news.sina.c

java抓取动态生成的网页

最近在做项目的时候有一个需求:从网页面抓取数据,要求是首先抓取整个网页的html源码(后期更新要使用到).刚开始一看这个简单,然后就稀里哗啦的敲起了代码(在这之前使用过Hadoop平台的分布式爬虫框架Nutch,使用起来是很方便,但是最后因为速度的原因放弃了,但生成的统计信息在后来的抓取中使用到了),很快holder.html和finance.html页面成功下载完成,然后解析完holder.html页面之后再解析finance.html,然后很沮丧的发现在这个页面中我需要的数据并没有在html

Nutch2.2.1抓取流程

一.抓取流程概述 1.nutch抓取流程当使用crawl命令进行抓取任务时,其基本流程步骤如下: (1)InjectorJob 开始第一个迭代 (2)GeneratorJob (3)FetcherJob (4)ParserJob (5)DbUpdaterJob (6)SolrIndexerJob 开始第二个迭代 (2)GeneratorJob (3)FetcherJob (4)ParserJob (5)DbUpdaterJob (6)SolrIndexerJob 开始第三个迭代 -- 2.抓取