package com.hszc.commons.utils; import java.net.HttpURLConnection; import java.net.URL; import org.apache.log4j.Logger; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.util.NodeIterator; /** * 解析HTML * @author admin * */ public class HtmlParseUtil { private static Logger logger = Logger.getLogger(HtmlParseUtil.class); public static StringBuffer toHtml(String url) { StringBuffer sBuffer = new StringBuffer(); try { Parser parser = new Parser( (HttpURLConnection) (new URL(url)).openConnection()); for (NodeIterator i = parser.elements(); i.hasMoreNodes();) { Node node = i.nextNode(); sBuffer.append(node.toHtml()); } } catch (Exception e) { logger.error("解析HTML异常", e); } return sBuffer; } public static void main(String[] args) { System.out.println(toHtml("http://www.baidu.com/")); } }
时间: 2024-10-12 04:53:19