使用fastjson解析json抓取新浪新闻文章

首先看看2个简单的fastjson的使用

例子一

package ivyy.taobao.com.domain.json;

import java.util.Iterator;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 * @Author:jilongliang
 * @Date:2014-12-19
 * @Version:1.0
 * @Description:
 */
public class JsonTest1 {
	public static void main(String[] args) {

		//[{"age":22,"sex":"男","userName":"xiaoliang"},{"age":22,"sex":"男","userName":"xiaoliang"}]
		StringBuffer buff=new StringBuffer();

		buff.append("[");
			buff.append("{");
				buff.append("'age'").append(":").append("22").append(",");
				buff.append("'sex'").append(":").append("'男'").append(",");
				buff.append("'userName'").append(":").append("'周伯通'").append("");
			buff.append("}");
			buff.append(",");//第一个数组结尾
			buff.append("{");
				buff.append("'age'").append(":").append("22").append(",");
				buff.append("'sex'").append(":").append("'男'").append(",");
				buff.append("'userName'").append(":").append("'令狐冲'").append("");
			buff.append("}");
		buff.append("]");

		String  jsonStr=buff.toString();

		JSONArray jarr=JSONArray.parseArray(jsonStr);//JSON.parseArray(jsonStr);
		for (Iterator iterator = jarr.iterator(); iterator.hasNext();) {
			JSONObject job=(JSONObject)iterator.next();
			String age=job.get("age").toString();
			System.out.println(age);
		}

	}
}

例子二

package ivyy.taobao.com.domain.json;

import ivyy.taobao.com.entity.Classz;
import ivyy.taobao.com.entity.Student;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

/**
 * @Author:jilongliang
 * @Date:2014-12-19
 * @Version:1.0
 * @Description:
 */
public class JsonTest2 {
	public static void main(String[] args) { 

		Student stu1=new Student();
		stu1.setAge(22);
		stu1.setUserName("xiaoliang");
		stu1.setSex("男");

		Classz claz1=new Classz();
		claz1.getStudents().add(stu1);

		String jsonStr=JSON.toJSONString(claz1);

		JSONObject jsonObj=new JSONObject();

		Object obj=jsonObj.parse(jsonStr);

		System.out.println(obj);

		Classz clz=JSON.parseObject(jsonStr, Classz.class);

		Student st=clz.getStudents().get(0);
		System.out.println(st.getSex());

	}
}

1和2的实体

package ivyy.taobao.com.entity;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
 *@Author:liangjl
 *@Date:2014-12-19
 *@Version:1.0
 *@Description:
 */
public class Classz implements Serializable{

	private List<Student> students=new ArrayList<Student>();

	public List<Student> getStudents() {
		return students;
	}

	public void setStudents(List<Student> students) {
		this.students = students;
	}
}
package ivyy.taobao.com.entity;

import java.io.Serializable;

/**
 *@Author:liangjl
 *@Date:2014-12-19
 *@Version:1.0
 *@Description:
 */
public class Student implements Serializable{
	private Integer age;
	private String sex;
	private String userName;

	public Integer getAge() {
		return age;
	}
	public void setAge(Integer age) {
		this.age = age;
	}
	public String getSex() {
		return sex;
	}
	public void setSex(String sex) {
		this.sex = sex;
	}
	public String getUserName() {
		return userName;
	}
	public void setUserName(String userName) {
		this.userName = userName;
	}
}

例子三、

package ivyy.taobao.com.domain.json;

import ivyy.taobao.com.utils.GlobalConstants;
import ivyy.taobao.com.utils.HttpRequestUtils;

import java.util.Iterator;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description这个是通过fastjson处理的
 */
public class SinaNew {

	public static void main(String[] args) throws Exception {

		String requestURL = GlobalConstants.getUrl(2, "json");

		String jsonText = HttpRequestUtils.HttpURLConnRequest(requestURL, "GET");
		//System.out.println(jsonText);

		// 处理页面的json数据
		int start = jsonText.indexOf("(") + 1;
		jsonText = jsonText.substring(start, jsonText.lastIndexOf(")"));

		String result = "";
		JSONObject jsonObject = JSONObject.parseObject(jsonText);
		result = jsonObject.get("result").toString();

		JSONObject resObj = JSONObject.parseObject(result);
		// String encoding=resObj.get("encoding").toString();//获取到编码

		String dataStr = resObj.get("data").toString();

		JSONArray dataArr = JSONArray.parseArray(dataStr);
		String title = "", url = "", keywords = "", img = "", media_name = "";
		int i=0;

		for (Iterator iterator = dataArr.iterator(); iterator.hasNext();) {
			JSONObject object = (JSONObject) iterator.next();

			title = object.get("title").toString();// title
			url = object.get("url").toString();// url
			keywords = object.get("keywords").toString();// keywords
			img = object.get("img").toString();// img
			media_name = object.get("media_name").toString();// media_name

			String newsText=GlobalConstants.getNewsContent(url);//处理新闻内容

			//System.out.println("==================第"+i+"篇=================="+newsText);
	    	i++;

			System.out.println(title + "\n" + url + "\n" + keywords + "\n"+ url + "\n" + media_name);

		}
	}
}

{
    "result":{
        "status":{
            "code":"0"
        },
        "encoding":"utf-8",
        "serverSeconds":"1420345455",
        "total":"183265",
        "count":"22",
        "last_time":"1420322833",
        "data":[
            {
                "id":"1-1-31356397",
                "column":"shwx",
                "title":"男子上楼取物车被人开跑 次日接电话被骂乱停车",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/060731356397.shtml",
                "keywords":"乱停车",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/2015-01-04\/U10608P1T1D31356397F21DT20150104060808.jpg",
                "level":"0",
                "createtime":"1420322833",
                "ext1":"",
                "ext2":"sh:1-1-31356397:0",
                "ext3":"男子上楼取物车被人开跑.图",
                "ext4":"sh:1-1-31356397:0",
                "ext5":"新文化延吉讯(记者 张骁) 上楼取东西,车子没熄火,下楼车就不见了,车主急得一宿没睡着。然而,第二天车主就接到一个电话,对方质问他:“你怎么停的车?”\n上楼两分多钟车没了",
                "old_level":"2",
                "media_type":"tw",
                "media_name":"新文化报"
            },
            {
                "id":"1-1-31356396",
                "column":"shwx",
                "title":"女子为使皮肤好连啃3天猪蹄下巴脱臼",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/060431356396.shtml",
                "keywords":"脱臼",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420322648",
                "ext1":"",
                "ext2":"sh:1-1-31356396:0",
                "ext3":"女子连啃3天猪蹄下巴脱臼",
                "ext4":"sh:1-1-31356396:0",
                "ext5":"想着吃猪蹄对皮肤好,24岁的魏琴(化名)趁着元旦放假连着炖了三天的猪蹄黄豆汤。昨晚7点,吃晚饭时,魏琴没想到在撕扯猪蹄最有嚼劲的那部分时,突然嘴巴合不拢了,到医院一看才发现是下巴脱臼。",
                "old_level":"1",
                "media_type":"",
                "media_name":"中国网"
            },
            {
                "id":"1-1-31356389",
                "column":"shwx",
                "title":"男子因与女儿争吵在高速上赌气下车后迷路",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/055931356389.shtml",
                "keywords":"高速公路,争吵",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420322345",
                "ext1":"",
                "ext2":"sh:1-1-31356389:0",
                "ext3":"男子因与女儿争吵高速上下车",
                "ext4":"sh:1-1-31356389:0",
                "ext5":"本报讯 元旦第一天,老刘从吉林长春飞到杭州机场,跟女儿团聚。本是一家和乐的事情,可是接下来的发展,出乎了所有人的意料。\n1月1日晚上11点多,高速交警温州支队指挥中心接到报警电话,一个男人用冻僵了的声音说,自己在高速公路迷路了。",
                "old_level":"2",
                "media_type":"",
                "media_name":"钱江晚报"
            },
            {
                "id":"1-1-31356391",
                "column":"shwx",
                "title":"女孩与父亲争吵后失联半个月 被找到时已身亡",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/055931356391.shtml",
                "keywords":"遇难,失联",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420322345",
                "ext1":"",
                "ext2":"sh:1-1-31356391:0",
                "ext3":"女孩失联半个月后遇难身亡",
                "ext4":"sh:1-1-31356391:0",
                "ext5":"本报讯 新年伊始,兰溪市公安局的民警心情却是沉重的。他们寻找了半个月的失踪女孩张可欣,最终确认遇难了——元旦当天,警方公布消息:发现了张可欣的遗体,初步结论是溺水身亡。\n张可欣,灵洞烟溪人,兰荫中学高一学生,今年16岁。12月13日下午,因为回家太迟和父亲发生争吵,被父亲打了两下…",
                "old_level":"2",
                "media_type":"",
                "media_name":"钱江晚报"
            },
            {
                "id":"1-1-31356338",
                "column":"shwx",
                "title":"女子爬栏杆要跳河被6旬老人拉住",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/055831356338.shtml",
                "keywords":"跳河",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420322308",
                "ext1":"",
                "ext2":"sh:1-1-31356338:0",
                "ext3":"女子爬栏杆要跳河被老人拉住",
                "ext4":"sh:1-1-31356338:0",
                "ext5":"1月2日晚上,在海门市通吕运河海门段的东方红大桥上,上演了“惊魂一幕”,一名女子爬上桥栏杆想要跳河。幸好,一位六旬老汉发现情况并及时拦下了她。但一心求死的王某又再度试图轻生,老汉不顾安危死死拉住她并大声呼救。几位路人迅速加入营救队伍。 通讯员 张陆翔 高水江 崔龙 现代快报记者 严君…",
                "old_level":"2",
                "media_type":"",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356336",
                "column":"fz-shyf",
                "title":"厨师穿正装冒充老总开会伺机偷盗",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/054831356336.shtml",
                "keywords":"盗窃",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420321689",
                "ext1":"",
                "ext2":"sh:1-1-31356336:0",
                "ext3":"厨师冒充老总开会伺机偷盗",
                "ext4":"sh:1-1-31356336:0",
                "ext5":"明明是个厨师,却穿得西装革履,还张口闭口称自己是老总,辗转各地的大酒店,去参加各种会议。他究竟有何目的?玄武警方将其抓获后发现,他去参会原来只是为了找机会偷东西捞一笔。 特约记者 杨维斌 现代快报记者 陶维洲",
                "old_level":"2",
                "media_type":"",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356313",
                "column":"shwx",
                "title":"男子欲抢劫锤伤房东后跳楼身亡",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/054031356313.shtml",
                "keywords":"抢劫,跳楼",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420321205",
                "ext1":"",
                "ext2":"sh:1-1-31356313:0",
                "ext3":"男子欲抢劫锤伤房东跳楼身亡",
                "ext4":"sh:1-1-31356313:0",
                "ext5":"广州日报顺德讯 (记者曾毅) 昨日顺德警方对外通报,1月2日发生在大良某高档小区的男子坠楼死亡事件,真相是该男子意图实施抢劫,事情败露后,跳楼逃跑时跌下,最终送到医院证实死亡。\n警方通报称,1月2日18时05分,在大良一小区发生一宗抢劫案——嫌疑人以租房为名,进入小区房间后用铁锤打伤…",
                "old_level":"2",
                "media_type":"",
                "media_name":"大洋网-广州日报"
            },
            {
                "id":"1-1-31356277",
                "column":"qwys",
                "title":"夫妇在国外因婴儿早产遭巨额账单",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/053931356277.shtml",
                "keywords":"异国产子",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420321145",
                "ext1":"",
                "ext2":"sh:1-1-31356277:0",
                "ext3":"夫妇异国产子遭巨额账单",
                "ext4":"sh:1-1-31356277:0",
                "ext5":"新华社电 一对英国夫妇前往美国纽约欢度新年假期,不料怀有身孕的妻子突然临盆。因早产11周的孩子无法出院,这对夫妇面临总额13万英镑(约合20万美元)的医疗账单。\n李·约翰斯顿及夫人凯蒂·阿莫斯上月26日来到纽约度假,阿莫斯28日出现早产征兆,入院后不久产下仅1.36千克重的儿子。",
                "old_level":"2",
                "media_type":"",
                "media_name":"大洋网-广州日报"
            },
            {
                "id":"1-1-31356270",
                "column":"shwx",
                "title":"打工男子痴心文学梦10年发表300多篇作品",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/053731356270.shtml",
                "keywords":"文学梦",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/2015-01-04\/U11556P1T1D31356270F21DT20150104053710.jpg",
                "level":"0",
                "createtime":"1420321030",
                "ext1":"",
                "ext2":"sh:1-1-31356270:0",
                "ext3":"打工男子爱文学发表300篇作品",
                "ext4":"sh:1-1-31356270:0",
                "ext5":"从1997年离开安徽老家南下深圳打工,他当过保安、做过电焊工、营业员,2011年来到太仓后,在南郊一家金属制品厂成为一名“打杂工”,尽管很少有闲下来的时间,但他却总不忘自己的文学梦想。",
                "old_level":"2",
                "media_type":"tw",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356268",
                "column":"shwx",
                "title":"男子向朋友借钱用假公章签合同",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/053131356268.shtml",
                "keywords":"假公章",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420320716",
                "ext1":"",
                "ext2":"sh:1-1-31356268:0",
                "ext3":"男子向借钱用假公章签合同",
                "ext4":"sh:1-1-31356268:0",
                "ext5":"做生意借钱救急,竟用假公章签合同骗得10万元现金。\n2014年11月初,常州的陆老板经人介绍,认识了一位周姓朋友以及周某的朋友蒋某,并在自己公司招待了两位朋友。交谈中,陆老板得知蒋某是同行,有相同的话题,彼此相谈甚欢。",
                "old_level":"2",
                "media_type":"",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356200",
                "column":"shwx",
                "title":"私家车占用高速路应急车道 救护车借道被堵死",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/050031356200.shtml",
                "keywords":"应急车道",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420318807",
                "ext1":"",
                "ext2":"sh:1-1-31356200:0",
                "ext3":"私家车占用高速路应急车道",
                "ext4":"sh:1-1-31356200:0",
                "ext5":"一辆正在执行救援的救护车,欲借助高速路的应急车道通过,没想前方车道早已车满为患,一辆接一辆的私家车纷纷挡住去路,让救护车的步伐举步维艰……这是1月3日下午3点发生在成雅高速上的一幕,救护车只要1分钟走完的路却足足拖了15分钟,而这些私家车占用应急车道的行为已被监控摄像头拍下,并面临严重的处罚。",
                "old_level":"1",
                "media_type":"",
                "media_name":"四川在线-华西都市报"
            },
            {
                "id":"1-1-31356211",
                "column":"zqsk",
                "title":"独腿村医拄拐杖行医30载 累计走过10万里出诊路",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/050031356211.shtml",
                "keywords":"村医",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/2015-01-04\/1420318825_xAuGKC.jpg",
                "level":"1",
                "createtime":"1420318807",
                "ext1":"",
                "ext2":"sh:1-1-31356211:0",
                "ext3":"独腿村医拄拐杖行医30载.图",
                "ext4":"sh:1-1-31356211:0",
                "ext5":"单肩挑医药箱,腋下夹拐杖,龙泉驿区山泉镇红花村卫生所医生陈永根,过去30年,就是这番装束在红花村所处的两座大山之间来来回回。并由此留下独腿行走崎岖山路30载的最美村医故事。",
                "old_level":"1",
                "media_type":"tw",
                "media_name":"四川在线-华西都市报"
            },
            {
                "id":"1-1-31356150",
                "column":"shwx",
                "title":"男子盯着美女看遭呵斥 纠集哥们伤害其丈夫",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/034731356150.shtml",
                "keywords":"打架",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420314478",
                "ext1":"",
                "ext2":"sh:1-1-31356150:0",
                "ext3":"男子看美女遭呵斥伤害其丈夫",
                "ext4":"sh:1-1-31356150:0",
                "ext5":"丽丽和好友一起去K歌,上洗手间时遭到一名陌生男子目不转睛的盯看。丽丽吼了对方几句,没想到这名男子张某觉得受到侮辱,竟纠集几个哥们持酒瓶冲进包厢打砸。近日,张某因涉嫌寻衅滋事罪被武进区检察院批捕。",
                "old_level":"1",
                "media_type":"",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356141",
                "column":"zqsk",
                "title":"男子为救患病儿子跨国求医 两年花掉400万(图)",
                "url":"http:\/\/news.sina.com.cn\/s\/p\/2015-01-04\/032831356141.shtml",
                "keywords":"求医,父爱",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/p\/2015-01-04\/U10608P1T1D31356141F21DT20150104032808.jpg",
                "level":"1",
                "createtime":"1420313288",
                "ext1":"",
                "ext2":"sh:1-1-31356141:0",
                "ext3":"男子为救患病儿子跨国求医.图",
                "ext4":"sh:1-1-31356141:0",
                "ext5":"所有的抢救措施,医生都用尽了。看着浑身发黑、奄奄一息的儿子,老金无限为难。儿子只有两个月大,乌溜溜的眼珠闪着光亮,像是在说,“爸爸,救救我。”老金和妻子下定决心,一定要治下去,哪怕山穷水尽。",
                "old_level":"1",
                "media_type":"tw",
                "media_name":"新文化报"
            },
            {
                "id":"1-1-31356069",
                "column":"shwx",
                "title":"智残男子奉父母命娶聋哑女 因沟通有障碍欲离婚",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/030031356069.shtml",
                "keywords":"离婚",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420311606",
                "ext1":"",
                "ext2":"sh:1-1-31356069:0",
                "ext3":"智残男子奉父母之命娶聋哑女",
                "ext4":"sh:1-1-31356069:0",
                "ext5":"有智力残疾的章某奉父母之命,娶了聋哑女子黄某。婚后两年,他以沟通有障碍为由起诉离婚。记者昨天获悉,经密云法院调解,章某撤回了起诉。",
                "old_level":"1",
                "media_type":"",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356047",
                "column":"fz-shyf",
                "title":"男子14年前花13万购进农村房 法院现判合同无效",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/030031356047.shtml",
                "keywords":"房屋买卖",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420311605",
                "ext1":"",
                "ext2":"sh:1-1-31356047:0",
                "ext3":"男子购房14年后合同被判无效",
                "ext4":"sh:1-1-31356047:0",
                "ext5":"国企员工李某14年前从岳老太手里买了一套农房。岳老太去世后,其儿子王先生作为继承人起诉要求确认房屋买卖行为无效。记者昨天获悉,顺义法院判决王先生胜诉。",
                "old_level":"1",
                "media_type":"",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356049",
                "column":"shwx",
                "title":"导演打黑车遗落电影素材被司机索2万元酬金",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/030031356049.shtml",
                "keywords":"黑车,酬金",
                "comment_channel":"sh",
                "img":"",
                "level":"1",
                "createtime":"1420311605",
                "ext1":"",
                "ext2":"sh:1-1-31356049:0",
                "ext3":"导演打车遗落素材被索2万酬金",
                "ext4":"sh:1-1-31356049:0",
                "ext5":"昨天,导演段先生所拍摄的儿童公益影视素材仍未能要回来。段先生称,之前他和助理携带影视素材,乘坐黑出租车时忘在车上,司机索要2万元酬谢金,交涉无果后,段先生选择报警。",
                "old_level":"1",
                "media_type":"",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356055",
                "column":"fz-shyf",
                "title":"3名无业游民冒充将军骗走银行高管1350万(图)",
                "url":"http:\/\/news.sina.com.cn\/s\/p\/2015-01-04\/030031356055.shtml",
                "keywords":"假冒将军,诈骗",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/p\/2015-01-04\/U10608P1T1D31356055F21DT20150104042353.jpg",
                "level":"1",
                "createtime":"1420311605",
                "ext1":"",
                "ext2":"sh:1-1-31356055:0",
                "ext3":"无业游民扮将军行骗银行高管",
                "ext4":"sh:1-1-31356055:0",
                "ext5":"虚构“解冻民族资产”等事实,李万勇等三名冒牌将军以授予军衔及任命“副部级银行”分行行长为诱饵,诈骗银行高管李先生1350万元。记者昨天获悉,北京市二中院一审以诈骗罪判处3名被告人有期徒刑12年至无期徒刑不等刑罚。",
                "old_level":"1",
                "media_type":"tw",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356063",
                "column":"fz-shyf",
                "title":"亲生儿子为争别墅与母亲对簿公堂",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/030031356063.shtml",
                "keywords":"对簿公堂,房产",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420311605",
                "ext1":"",
                "ext2":"sh:1-1-31356063:0",
                "ext3":"亲生母子对簿公堂争别墅",
                "ext4":"sh:1-1-31356063:0",
                "ext5":"70多岁的朱老汉为了办理房屋抵押贷款,将别墅过户给了长子。朱老汉猝死后,老伴岳老太起诉长子要求撤销房屋赠与合同。近日,通州法院判决支持了岳老太的诉求。",
                "old_level":"2",
                "media_type":"",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356067",
                "column":"shwx",
                "title":"妻子患间歇性精神病 事业有成丈夫起诉离婚",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/030031356067.shtml",
                "keywords":"精神病,离婚",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420311605",
                "ext1":"",
                "ext2":"sh:1-1-31356067:0",
                "ext3":"妻患间歇性精神病.丈夫欲离婚",
                "ext4":"sh:1-1-31356067:0",
                "ext5":"京华时报讯(记者裴晓兰)妻子患精神病在医院治疗,事业上小有成就的丈夫坚决起诉要求离婚。近日,密云法院开庭审理了此案。\n赵先生诉称,他和李女士于1999年11月登记结婚,2000年育有一女。双方婚后因人生观、价值观不同多次发生矛盾,于2012年12月起分居至今,他起诉离婚并分割夫妻共同财产。…",
                "old_level":"2",
                "media_type":"",
                "media_name":"京华时报"
            },
            {
                "id":"1-1-31356011",
                "column":"shwx",
                "title":"男子办理低保不成投毒报复全村",
                "url":"http:\/\/news.sina.com.cn\/s\/2015-01-04\/025931356011.shtml",
                "keywords":"投毒",
                "comment_channel":"sh",
                "img":"",
                "level":"0",
                "createtime":"1420311547",
                "ext1":"",
                "ext2":"sh:1-1-31356011:0",
                "ext3":"男子办低保不成投毒报复全村",
                "ext4":"sh:1-1-31356011:0",
                "ext5":"因种种原因未能办成低保,男子竟投毒报复全村村民。记者从山西省闻喜县公安部门获悉,该县近日破获了一起故意投毒案,闻喜县侯村乡村民高某被依法批捕。\n2014年12月17日,山西省闻喜县公安局接到侯村乡村民报案称,有人在村里的蓄水池中下毒。警方锁定犯罪嫌疑人高某。高某因种种原因未能办成低…",
                "old_level":"2",
                "media_type":"",
                "media_name":"现代快报"
            },
            {
                "id":"1-1-31356129",
                "column":"shwx",
                "title":"商场派发礼品引上百人疯抢 有人被挤哭(图)",
                "url":"http:\/\/news.sina.com.cn\/s\/p\/2015-01-04\/024531356129.shtml",
                "keywords":"拥挤,踩踏",
                "comment_channel":"sh",
                "img":"http:\/\/www.sinaimg.cn\/dy\/s\/p\/2015-01-04\/U10608P1T1D31356129F21DT20150104061356.jpg",
                "level":"1",
                "createtime":"1420310759",
                "ext1":"",
                "ext2":"sh:1-1-31356129:0",
                "ext3":"商场派发礼品引上百人拥挤.图",
                "ext4":"sh:1-1-31356129:0",
                "ext5":"昨日上午10时许,数百市民前往位于延安双拥大道中段的“延百东大上品”商场,拿手机展示商场所要求集的38个微信“点赞”,争着领取床上用品。然而,由于人数众多,场面十分混乱。11时许,延安警方30余民警赶至现场劝离人群,并责令商场停止活动。事件未造成人员受伤。",
                "old_level":"1",
                "media_type":"tw",
                "media_name":"华商网-华商报"
            }
        ]
    }
}
package ivyy.taobao.com.utils;

import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description
 */
public class GlobalConstants {

	/***
	 * 获取url连接
	 * @param page第几页
	 * @param format格式(XML、JSON)
	 * @return
	 */
	public static String getUrl(Integer page,String format){
		StringBuffer buffer=new StringBuffer("http://api.roll.news.sina.com.cn/zt_list?channel=news");
		String url="";
		buffer.append("&cat_1=shxw");//显示新闻
		buffer.append("&cat_2==zqsk||=qwys||=shwx||=fz-shyf");
		buffer.append("&level==1||=2");//级别
		buffer.append("&show_ext=1");
		buffer.append("&show_all=1");//显示所有
		buffer.append("&show_num=22");//显示多少条
		buffer.append("&tag=1");
		buffer.append("&format="+format);
		buffer.append("&page="+page);
		buffer.append("&callback=newsloader");
		url=buffer.toString();
		return url;
	}

	/***
	 * 获取文章的内容
	 * 从新浪的网页分析,通过文章body的id就可以拿到相应的文章内容..
	 * @param url
	 * @return
	 */
	public static String getNewsContent(String url) throws Exception{
		Document doc=Jsoup.parse(new URL(url), 3000);
		if(doc!=null){
			String artibody=doc.getElementById("artibody").html();//通过网页的html的id去拿到新闻内容artibody
			return artibody;
		}else{
			return "网络异常";
		}
	}
}
package ivyy.taobao.com.utils;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description
 */

public class HttpRequestUtils {
	/**
	 * 发送http请求
	 * POST和GET请求都可以
	 * @param requestUrl 请求地址
	 * @param method传入的执行的方式 是GET还是POST方式
	 * @return String
	 */
	public static String HttpURLConnRequest(String requestUrl,String method) {
		StringBuffer buffer = new StringBuffer();
		try {
			URL url = new URL(requestUrl);
			HttpURLConnection httpUrlConn = (HttpURLConnection) url.openConnection();
			httpUrlConn.setDoInput(true);
			httpUrlConn.setRequestMethod(method);
			httpUrlConn.setUseCaches(false);
			httpUrlConn.setInstanceFollowRedirects(true); //重定向
			httpUrlConn.connect();
			// 将返回的输入流转换成字符串
			InputStream inputStream = httpUrlConn.getInputStream();
			InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "utf-8");
			BufferedReader bufferedReader = new BufferedReader(inputStreamReader);

			String str = null;
			while ((str = bufferedReader.readLine()) != null) {
				buffer.append(str);
			}
			bufferedReader.close();
			inputStreamReader.close();
			// 释放资源
			inputStream.close();
			inputStream = null;
			httpUrlConn.disconnect();

		} catch (Exception e) {
			e.printStackTrace();
		}
		return buffer.toString();
	}
}

源代码:http://download.csdn.net/detail/jilongliang/8324543

时间: 2024-07-29 04:57:04

使用fastjson解析json抓取新浪新闻文章的相关文章

python爬虫:使用urllib.request和BeautifulSoup抓取新浪新闻标题、链接和主要内容

案例一 抓取对象: 新浪国内新闻(http://news.sina.com.cn/china/),该列表中的标题名称.时间.链接. 完整代码: from bs4 import BeautifulSoup import requests url = 'http://news.sina.com.cn/china/' web_data = requests.get(url) web_data.encoding = 'utf-8' soup = BeautifulSoup(web_data.text,'

python爬虫:抓取新浪新闻内容(从当前时间到之前某个时间段),并用jieba分词,用于训练自己的分词模型

新浪新闻内容采用的是ajax动态显示内容,通过抓包,发现如下规律: 每次请求下一页,js那一栏都会出现新的url: "http://api.roll.news.sina.com.cn/zt_list?channel=news&cat_1=gnxw&cat_2==gdxw1" "||=gatxw||=zs-pl||=mtjj&level==1||=2&show_ext=1&show_all=1&show_num=22&ta

Python抓取新浪新闻数据(二)

以下是抓取的完整代码(抓取了网页的title,newssource,dt,article,editor,comments)举例: 原文地址:http://blog.51cto.com/2290153/2126861

抓取新浪新闻的内容以及链接

import requestsfrom bs4 import BeautifulSoupres = requests.get('http://news.sina.com.cn/china/')res.encoding='utf-8'soup = BeautifulSoup(res.text,'html.parser') for news in soup.select('.news-item'): if(len(news.select('h2'))>0): h2=news.select('h2')

Node.js抓取新浪新闻标题

"use strict"; let cheerio = require("cheerio"); let http = require("http"); let iconv = require("iconv-lite"); let mainUrl = "http://news.sina.com.cn/world/"; http.get(mainUrl, function(sres) { var chunks

用Selenium抓取新浪天气

(1)用Selenium抓取新浪天气 系统环境: 操作系统:macOS 10.13.6 python :2.7.10 用虚拟环境实现 一.创建虚拟环境: mkvirtualenv --python=/usr/bin/python python_2 二.激活虚拟环境: workon python_2 三.安装Selenium pip install Selenium 四.安装firefox的Selenium补丁文件: brew install geckodriver 五.在~/.bash_prof

爬虫Scrapy学习指南之抓取新浪天气

scrapy有一个简单的入门文档,大家可以参考一下,我感觉官方文档是最靠谱的,也是最真实的. 首先我们先创建一个scrapy的项目 scrapy startproject weather 我采用的是ubuntu12.04的系统,建立项目之后主文件夹就会出现一个weather的文件夹.我们可以通过tree来查看文件夹的结构.可以使用sudoapt-get install tree安装. tree weather weather ├── scrapy.cfg ├── wea.json ├── wea

python抓取新浪首页的小例子

参考 廖雪峰的python教程:http://www.liaoxuefeng.com/wiki/001374738125095c955c1e6d8bb493182103fac9270762a000/001386832653051fd44e44e4f9e4ed08f3e5a5ab550358d000 代码: 1 #!/usr/bin/python 2 3 # import module 4 import socket 5 import io 6 7 # create TCP object 8 s

Python爬虫:新浪新闻详情页的数据抓取(函数版)

上一篇文章<Python爬虫:抓取新浪新闻数据>详细解说了如何抓取新浪新闻详情页的相关数据,但代码的构建不利于后续扩展,每次抓取新的详情页时都需要重新写一遍,因此,我们需要将其整理成函数,方便直接调用. 详情页抓取的6个数据:新闻标题.评论数.时间.来源.正文.责任编辑. 首先,我们先将评论数整理成函数形式表示: 1 import requests 2 import json 3 import re 4 5 comments_url = 'http://comment5.news.sina.c