代码示例
package demo0806; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.Set; public class ScanTitleFromWebPage { private String website; private Map<String,String> recentShareCode=new HashMap<String,String>(); private Map<String,String> hotShareCode=new HashMap<String,String>(); public ScanTitleFromWebPage(String website) { this.website=website; } public String ScanWebForTitle() { InputStream inputStream=null; String title=null; try { //创建URL对象,例如:百度搜索中国好声音 //wd关键词的值即为"中国好声音"的UTF-8编码, //可以使用URLEncoder对字符进行编 URL url = new URL(website); //创建URLConnection对象 URLConnection openConnection = url.openConnection(); //有些网站不允许java作为客户端访问 openConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); //获取网页信息编码类型 String headerField = openConnection.getHeaderField("Content-Type"); int indexOf = headerField.indexOf("charset="); String encoding = headerField.substring(indexOf+8); //获取URLConnection对象的输入流 inputStream=openConnection.getInputStream(); //通过IO来读取流,写入文件 String line=null; InputStreamReader inputStreamReader; inputStreamReader = new InputStreamReader(inputStream,encoding); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); int flagOfRecentShareCode=0; int flagOfHotShareCode=0; String recentCode=null; String recentHref=null; String hotCode=null; String hotHref=null; while((line=bufferedReader.readLine())!=null) { int start=-1; int end=-1; if((start=line.indexOf("<title>"))!=-1) { end=line.indexOf("</title>"); title = line.substring(start+7, end); } else if(line.indexOf("NewCodeList")!=-1) { flagOfRecentShareCode=1; } else if(line.indexOf("HotCodeList")!=-1) { flagOfRecentShareCode=0; flagOfHotShareCode=1; } else if(line.indexOf( "</div>")!=-1) { flagOfHotShareCode=0; } else if((start= line.indexOf("href="))!=-1&&flagOfRecentShareCode==1) { end=line.indexOf(" target"); recentHref=line.substring(start+6, end-1); } else if((start= line.indexOf("href="))!=-1&&flagOfHotShareCode==1) { end=line.indexOf(" target"); hotHref=line.substring(start+6, end-1); } else if((start= line.indexOf("title="))!=-1&&flagOfRecentShareCode==1) { end=line.indexOf(">"); recentCode=line.substring(start+7, end-1); recentShareCode.put(recentCode, recentHref); } else if((start= line.indexOf("title="))!=-1&&flagOfHotShareCode==1) { end=line.indexOf(">"); hotCode=line.substring(start+7, end-1); hotShareCode.put(hotCode, hotHref); } } inputStreamReader.close(); } catch (IOException e) { System.err.println("无法下载"); e.printStackTrace(); } finally { if(inputStream!=null) { try { inputStream.close(); } catch(Exception ex) { //不处理 } } } return title; } public static void main(String[] args) throws InterruptedException, IOException { String website="http://www.oschina.net"; ScanTitleFromWebPage scanTitleFromWebPage; scanTitleFromWebPage=new ScanTitleFromWebPage(website); String title = scanTitleFromWebPage.ScanWebForTitle(); File file=new File("OSChomepage.html"); FileOutputStream fileOutputStream = new FileOutputStream(file); if(title!=null) { String str="网站标题为:"+title; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(str); } Map<String,String> recentShareCode=scanTitleFromWebPage.recentShareCode; Map<String,String> hotShareCode=scanTitleFromWebPage.hotShareCode; Set<Entry<String, String>> entrySet; Iterator<Entry<String, String>> iterator; String key=null; String value=null; Entry<String, String> next=null; fileOutputStream.write("----------------最新分享代码有如下----------------".getBytes()); fileOutputStream.write(‘\n‘); System.out.println("----------------最新分享代码有如下----------------"); entrySet= recentShareCode.entrySet(); iterator= entrySet.iterator(); while(iterator.hasNext()) { next = iterator.next(); key=next.getKey(); value=next.getValue(); String str=key+"\t"+"("+value+")"; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(key+"\t"+"("+value+")"); } fileOutputStream.write("----------------本周最热门代码有如下----------------".getBytes()); fileOutputStream.write(‘\n‘); System.out.println("------------------本周最热门代码有如下-----------------"); entrySet= hotShareCode.entrySet(); iterator= entrySet.iterator(); while(iterator.hasNext()) { next = iterator.next(); key=next.getKey(); value=next.getValue(); String str=key+"\t"+"("+value+")"; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(key+"\t"+"("+value+")"); } fileOutputStream.close(); } }
运行结果
网站标题为:开源中国 - 找到您想要的开源项目,分享和交流 ----------------最新分享代码有如下---------------- iOS 一个函数同时返回多个参数的策略 (http://www.oschina.net/code/snippet_865986_50059) jquery插件--ajaxfileupload.js (http://www.oschina.net/code/snippet_105637_50057) 计蒜客-挑战难题-6 (http://www.oschina.net/code/snippet_587996_50055) 图片延迟加载简单原理 (http://www.oschina.net/code/snippet_1590754_50058) 我该如何书写一段能实现早睡早起的代码? (http://www.oschina.net/code/snippet_1168184_50061) shell获取当前脚本执行绝对路径 (http://www.oschina.net/code/snippet_1988965_50056) 冒泡排序算法java实现 (http://www.oschina.net/code/snippet_587996_50052) js脚本控制翻页控件概述。这个控件主要用来翻页的一个效果,如果有喜欢的可以那去参考,呵呵 (http://www.oschina.net/code/snippet_1862064_50060) 选择排序算方法java实现 (http://www.oschina.net/code/snippet_587996_50053) 计蒜客-挑战难题-5 (http://www.oschina.net/code/snippet_587996_50054) ------------------本周最热门代码有如下----------------- python实现爬图,不要再爬妹子图了,太没品了 (http://www.oschina.net/code/snippet_2371155_49889) 通过银行卡号取得银行名字 (http://www.oschina.net/code/snippet_1252640_49997) Java反射基础,构建框架(重要) (http://www.oschina.net/code/snippet_2345495_49988) spring boot + mybatis+ spring mvc整合 (http://www.oschina.net/code/snippet_2325859_49871) 全医通 - HTML5开发,单页集成版 (http://www.oschina.net/code/snippet_2287693_50012) 微信公众号支付 (http://www.oschina.net/code/snippet_1754599_49966) 我的Eclipse代码格式化风格 (http://www.oschina.net/code/snippet_1584959_49953) python简单爬虫 (http://www.oschina.net/code/snippet_2391943_49998) 获取情敌电脑内照片神器 (http://www.oschina.net/code/snippet_2425035_49995) 12306火车票API接口QQ 89914505 (http://www.oschina.net/code/snippet_811693_49880)
时间: 2024-10-14 02:24:54