同学突然有个蛋疼的需求,抓取以下界面的数据至数据库,我因为很闲,更加蛋疼的自告奋勇帮忙完成。
http://www.shmet.com/Template/_Template.html?viewName=_HomeSpotPrice&metalid=10133%2C10131%2C10132%2C10002%2C10003%2C10134%2C10135&_=1453249939502
主要分为3部分:
1)从网页获取数据。
2)将获取的数据实体list集合中去。
3)存入数据库中。
从网页获取数据代码如下(获取到的是xml格式数据):
public class GetHttpXml { public static List<Metal> captureJavascript() throws Exception { String strURL = "http://www.shmet.com/Template/_Template.html?viewName=_HomeSpotPrice&metalid=10133%2C10131%2C10132%2C10002%2C10003%2C10134%2C10135&_=1453249939502"; URL url = new URL(strURL); HttpURLConnection httpConn = (HttpURLConnection) url.openConnection(); InputStreamReader input = new InputStreamReader(httpConn .getInputStream(), "utf-8"); BufferedReader bufReader = new BufferedReader(input); String line = ""; StringBuilder contentBuf = new StringBuilder(); while ((line = bufReader.readLine()) != null) { contentBuf.append(line); } DomParseService dom = new DomParseService(); List<Metal> dataList = dom.getData(contentBuf.toString().trim()); for(Metal metal : dataList){ System.out.println(metal.getChange() + "/ " + metal.getDate() + "/ " + metal.getName() + "/ " + metal.getPrice() + "/ " + metal.getUpWater()); } return dataList; }
这大部分是网上当的,基本内容一次性成功,所以不作说明。
接下来解析xml,我使用的是dom4j解析,由于格式不大友好,还是折腾我一段时间的,代码如下:
import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.Element; public class DomParseService { List<Metal> listMetal = new ArrayList<Metal>(); public List<Metal> getData(String inputStream) throws Exception{ Document document = DocumentHelper.parseText(inputStream); //获取根节点元素对象 Element rootElement = document.getRootElement(); // 获取tbody节点。 Element element = rootElement.element("tbody"); Iterator<Element> iteratorTbody = element.elementIterator(); while(iteratorTbody.hasNext()){ Element eleTr = iteratorTbody.next(); if(eleTr.attributeCount() > 0){ Iterator<Element> iteratorTr = eleTr.elementIterator(); Metal metal = new Metal(); while(iteratorTr.hasNext()){ Element eleTd = iteratorTr.next(); // System.out.println(eleTd.getText() + " ---- " + eleTd.attributeValue("class") + "\n\t"); if(eleTd.attributeCount() == 0){ metal.setUpWater(eleTd.getText()); } else if(eleTd.attributeCount() > 0) { // System.out.println(eleTd.getText() + " ---- " + eleTd.attributeValue("class").trim() + "\n\t"); if(eleTd.attributeValue("class").indexOf("name") != -1){ metal.setName(eleTd.getText()); } else if(eleTd.attributeValue("class").indexOf("price") != -1){ metal.setPrice(eleTd.getText()); } else if(eleTd.attributeValue("class").indexOf("change") != -1){ Iterator<Element> iteratorFont = eleTd.elementIterator(); while(iteratorFont.hasNext()){ Element eleFont = iteratorFont.next(); metal.setChange(eleFont.getText()); } } else if(eleTd.attributeValue("class").indexOf("today") != -1){ metal.setDate(eleTd.getText()); } } // System.out.println(metal.getChange() + "/ " + metal.getDate() + "/ " + metal.getName() + "/ " + metal.getPrice() + "/ " + metal.getUpWater() + "\n\t"); } listMetal.add(metal); } } return listMetal; } }
在上面有个疑惑:如eleTd.attributeValue("class").indexOf("price") != -1就是想判断属性class的值是否包含price字段,原先用eleTd.attributeValue("class").Trim() == "price"没有判断成功,eleTd.attributeValue("class").Trim()应该不包含别的字符,怎么会出错呢?
PS:我是用dom4j解析的,需要导入dom4j.jar包。
最后就是将数据导入数据库中,使用的是网上搜到的工具:
PS:这个也需要导包,从微软上下sqljdbc.jar包
import java.lang.reflect.*; import java.sql.*; import java.util.*; public class SqlHelper { // SQL Server /** * JDBC驱动名称 */ public static final String CLASS_NAME = "com.microsoft.sqlserver.jdbc.SQLServerDriver"; /** * 数据库连库字符串 */ public static final String URL = "jdbc:sqlserver://192.168.63.109;databaseName=HTTPCollection"; /** * 用户名 */ public static final String UID = "sa"; /** * 密码 */ public static final String PWD = "jxjxjx123"; /** * JDBC驱动类型 */ public static Class CLS = null; // Oracle // public static final String CLASS_NAME = // "oracle.jdbc.driver.OracleDriver"; // public static final String URL = // "jdbc:oracle:thin:@localhost:1522:accp11g"; // public static final String UID = "system"; // public static final String PWD = "manager"; /** * 获取数据库连接对象 * * @return * @throws ClassNotFoundException * @throws SQLException */ public static Connection getConnection() throws ClassNotFoundException, SQLException { if (CLS == null) { CLS = Class.forName(CLASS_NAME); } return DriverManager.getConnection(URL, UID, PWD); } /** * 执行SQL语句不返回查询的操作,返回受影响的行数 * * @param sql * SQL语句 * @return 受影响的行数 * @throws ClassNotFoundException * @throws SQLException */ public static int executeNonQuery(String sql) { int result = -1; Connection con = null; PreparedStatement ps = null; try { con = getConnection(); ps = con.prepareStatement(sql); result = ps.executeUpdate(); } catch (Exception e) { e.printStackTrace(); } finally { close(con, ps, null); } return result; } /** * 执行Insert语句,返回Insert成功之后标识列的值 * * @param sql * @return * @throws ClassNotFoundException * @throws SQLException */ public static int executeIdentity(String sql) { int identity = -1; Connection con = null; Statement ps = null; ResultSet rs = null; try { con = getConnection(); ps = con.createStatement(); ps.executeUpdate(sql, Statement.RETURN_GENERATED_KEYS); rs = ps.getGeneratedKeys(); if (rs.next()) { // identity = rs.getInt("GENERATED_KEYS"); identity = rs.getInt(1); } } catch (Exception e) { e.printStackTrace(); } finally { close(con, ps, null); } return identity; } /** * 执行不返回结果集的存储过程 * * @param sql * 存储过程名称 * @param params * 存储过程参数 * @throws ClassNotFoundException * @throws SQLException */ public static void executeNonQuery(String sql, SqlParameter... params) { Connection con = null; CallableStatement cs = null; try { con = getConnection(); cs = con.prepareCall(sql); setSqlParameter(cs, params); cs.executeUpdate(); getSqlParameter(cs, params); } catch (Exception e) { e.printStackTrace(); } finally { close(con, cs, null); } } /** * 执行返回聚合函数的操作 * * @param sql * 含有聚合函数的SQL语句 * @return 聚合函数的执行结果 * @throws SQLException * @throws ClassNotFoundException */ public static int executeScalar(String sql) { int result = -1; Connection con = null; PreparedStatement ps = null; ResultSet rs = null; try { con = getConnection(); ps = con.prepareStatement(sql); rs = ps.executeQuery(); if (rs.next()) { result = rs.getInt(1); } } catch (Exception e) { e.printStackTrace(); } finally { close(con, ps, rs); } return result; } /** * 执行返回泛型集合的SQL语句 * * @param cls * 泛型类型 * @param sql * 查询SQL语句 * @return 泛型集合 * @throws ClassNotFoundException * @throws SQLException * @throws InstantiationException * @throws IllegalAccessException */ public static <T> List<T> executeList(Class<T> cls, String sql) { List<T> list = new ArrayList<T>(); Connection con = null; PreparedStatement ps = null; ResultSet rs = null; try { con = getConnection(); ps = con.prepareStatement(sql); rs = ps.executeQuery(); while (rs.next()) { T obj = executeResultSet(cls, rs); list.add(obj); } } catch (Exception e) { e.printStackTrace(); } finally { close(con, ps, rs); } return list; } /** * 执行返回泛型集合的存储过程 * * @param cls * 泛型类型 * @param sql * 存储过程名称 * @param params * 存储过程参数 * @return 泛型集合 * @throws ClassNotFoundException * @throws SQLException * @throws InstantiationException * @throws IllegalAccessException */ public static <T> List<T> executeList(Class<T> cls, String sql, SqlParameter... params) { List<T> list = new ArrayList<T>(); Connection con = null; CallableStatement cs = null; ResultSet rs = null; try { con = getConnection(); cs = con.prepareCall(sql); setSqlParameter(cs, params); rs = cs.executeQuery(); while (rs.next()) { T obj = executeResultSet(cls, rs); list.add(obj); } } catch (Exception e) { e.printStackTrace(); } finally { close(con, cs, rs); } return list; } /** * 执行返回泛型类型对象的SQL语句 * * @param cls * 泛型类型 * @param sql * SQL语句 * @return 泛型类型对象 * @throws SQLException * @throws ClassNotFoundException * @throws InstantiationException * @throws IllegalAccessException */ public static <T> T executeEntity(Class<T> cls, String sql) { T obj = null; Connection con = null; PreparedStatement ps = null; ResultSet rs = null; try { con = getConnection(); ps = con.prepareStatement(sql); rs = ps.executeQuery(); while (rs.next()) { obj = executeResultSet(cls, rs); break; } } catch (Exception e) { e.printStackTrace(); } finally { close(con, ps, rs); } return obj; } /** * 执行返回泛型类型对象的存储过程 * * @param cls * 泛型类型 * @param sql * SQL语句 * @param params * 存储过程参数 * @return 泛型类型对象 * @throws SQLException * @throws ClassNotFoundException * @throws InstantiationException * @throws IllegalAccessException */ public static <T> T executeEntity(Class<T> cls, String sql, SqlParameter... params) { T obj = null; Connection con = null; CallableStatement cs = null; ResultSet rs = null; try { con = getConnection(); cs = con.prepareCall(sql); setSqlParameter(cs, params); rs = cs.executeQuery(); while (rs.next()) { obj = executeResultSet(cls, rs); break; } } catch (Exception e) { e.printStackTrace(); } finally { close(con, cs, rs); } return obj; } /** * 将一条记录转成一个对象 * * @param cls * 泛型类型 * @param rs * ResultSet对象 * @return 泛型类型对象 * @throws InstantiationException * @throws IllegalAccessException * @throws SQLException */ private static <T> T executeResultSet(Class<T> cls, ResultSet rs) throws InstantiationException, IllegalAccessException, SQLException { T obj = cls.newInstance(); ResultSetMetaData rsm = rs.getMetaData(); int columnCount = rsm.getColumnCount(); // Field[] fields = cls.getFields(); Field[] fields = cls.getDeclaredFields(); for (int i = 0; i < fields.length; i++) { Field field = fields[i]; String fieldName = field.getName(); for (int j = 1; j <= columnCount; j++) { String columnName = rsm.getColumnName(j); if (fieldName.equalsIgnoreCase(columnName)) { Object value = rs.getObject(j); field.setAccessible(true); field.set(obj, value); break; } } } return obj; } /** * 设置存储过程参数名称,参数值,参数方向 * * @param cs * @param params * @throws SQLException */ private static void setSqlParameter(CallableStatement cs, SqlParameter... params) throws SQLException { if (params != null) { for (SqlParameter param : params) { if (param.OutPut) { String paramName = param.Name; if (paramName == null || paramName.equals("")) { cs.registerOutParameter(1, param.Type);// 设置返回类型参数 } else { cs.registerOutParameter(paramName, param.Type);// 设置输出类型参数 } } else { cs.setObject(param.Name, param.Value);// 设置输入类型参数 } } } } /** * 得到存储过程参数执行结果 * * @param cs * @param params * @throws SQLException */ private static void getSqlParameter(CallableStatement cs, SqlParameter... params) throws SQLException { for (SqlParameter param : params) { if (param.OutPut) { String paramName = param.Name; if (paramName == null || paramName.equals("")) { param.Value = cs.getObject(1);// 返回类型参数值 } else { param.Value = cs.getObject(paramName);// 输出类型参数值 } } } } /** * 关闭JDBC对象,释放资源。 * * @param con * 连接对象 * @param ps * 命令对象 * @param rs * 结果集对象 * @throws SQLException */ private static void close(Connection con, Statement ps, ResultSet rs) { try { // rs.close(); //jiangxiang删除改行,当多次掉用该语句时会报空指针异常,因为rs已关闭。 if (rs != null) { rs = null; } if (ps != null) { ps.close(); ps = null; } if (con != null) { con.close(); con = null; } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
/** * 存储过程参数类型 * @author Administrator * */ public class SqlParameter { /** * 参数名称 */ public String Name; /** * 参数值 */ public Object Value; /** * true表示参数为输出类型 */ public boolean OutPut; /** * 参数类型 */ public int Type; /** * 输入类型参数的构造函数 * @param name 存储过程 输入类型 参数名称 * @param value 存储过程 输入类型 参数值 */ public SqlParameter(String name,Object value){ this.Name = name; this.Value= value; } /** * 输出类型参数的构造函数 * @param type 存储过程 输出类型 参数类型 * @param name 存储过程 输出类型 参数名称 */ public SqlParameter(int type,String name){ this.Name = name; this.OutPut = true; this.Type = type; } /** * 返回类型参数的构造函数 * @param type 存储过程 返回类型 */ public SqlParameter(int type){ this.Name = ""; this.OutPut = true; this.Type = type; } }
工具类挺好用的,不过使用的时候循环导入时报错,断点调试后发现close中有问题,将一行代码注释掉就好了。
最后测试下:
public static void main(String[] args) throws Exception { List<Metal> list =captureJavascript(); for(Metal metal : list){ String sql = "insert into Metal([name],[price],[change],[upWater],[date]) values(‘" + metal.getName() + "‘,‘" + metal.getPrice() + "‘,‘" + metal.getChange() + "‘,‘" + metal.getUpWater() + "‘,‘" + metal.getDate() +"‘)"; int identyValue = SqlHelper.executeIdentity(sql); } }
成功,数据库和实体类不附上了。
时间: 2024-10-20 16:52:00