/** * 价值在线数据-左边分类抓取 * http://www.valueonline.cn/laws/laws?typeid=96219074211635284 * @author hwaggLee */ public class UtilsHtmValueonLineType { public static void main(String[] args) { String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284"; readHtml(url); } public static List<Object> readHtml(String url){ List<Object> list = new ArrayList<Object>(); // Document doc = null; try { doc = Jsoup.connect(url).get(); } catch (Exception e) { ///e.printStackTrace(); System.out.println(e.getMessage()+":--------------->"+url); } if( doc == null )return list; Elements elScripts = doc.getElementsByTag("script"); String[] elScriptList = elScripts.get(0).data().toString().split("var"); String strTypeList = elScriptList[2]; if( StringUtils.isNotBlank(strTypeList)){ /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1); JSONArray array = JSONArray.fromObject(strTypeList); JSONArray arrayList = JSONArray.fromObject(array.get(0)); for (Object o : arrayList) { JSONObject object = JSONObject.fromObject(o); StringBuilder sb = new StringBuilder(); sb.append("insert into n3b_vl_plate_type values "); sb.append(" ( "); sb.append("‘"+object.get("id")+"‘"); sb.append(",‘"+object.get("parentId")+"‘"); sb.append(","+object.get("level")+""); sb.append(",‘"+object.get("declareTypeName")+"‘"); sb.append(",‘"+object.get("declareTypeNo")+"‘"); sb.append(",‘"+object.get("validFlag")+"‘"); sb.append(","+object.get("oftenFlag")+""); sb.append(",‘"+object.get("showTypeName")+"‘"); sb.append(" ); "); System.out.println(sb.toString()); }*/ } strTypeList = elScriptList[3]; System.out.println(strTypeList); if( StringUtils.isNotBlank(strTypeList) ){ strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1); JSONArray arrayList = JSONArray.fromObject(strTypeList); for (Object o : arrayList) { JSONObject object = JSONObject.fromObject(o); StringBuilder sb = new StringBuilder(); sb.append("insert into n3b_vl_market_type values "); sb.append(" ( "); sb.append("‘0"+object.get("code_value")+"‘"); sb.append(",‘"+object.get("code_name")+"‘"); sb.append(",‘"+object.get("code_no")+"‘"); sb.append(",‘"+object.get("code_value")+"‘"); sb.append(",‘"+object.get("valid_flag")+"‘"); sb.append(",‘"+object.get("version")+"‘"); sb.append(",‘"+object.get("code_type")+"‘"); sb.append(" ); "); System.out.println(sb.toString()); } } return list; } }
时间: 2024-10-11 00:46:17