/** * 通过w3c方式 读取xml内容 * @param lablenames 要读取的节点名称 * @param file_path_name 文件绝对路径 * @return */ public static WebMagic readXML(List<String> lablenames, String file_path_name) { WebMagic webMagic = new WebMagic(); try { DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); org.w3c.dom.Document document = builder.parse(new File(file_path_name)); org.w3c.dom.Element rootElement = document.getDocumentElement(); for (int i = 0; i < lablenames.size(); i++) { NodeList list = rootElement.getElementsByTagName(lablenames.get(i)); org.w3c.dom.Element element = (org.w3c.dom.Element) list .item(0); System.out.println(element.getChildNodes().item(0).getNodeValue()); if ("starturl".equals(element.getNodeName())) { webMagic.setStarturl(element.getChildNodes().item(0) .getNodeValue()); } if ("regexstarturl".equals(element.getNodeName())) { webMagic.setRegexstarturl(element.getChildNodes().item(0) .getNodeValue()); } if ("labelName".equals(element.getNodeName())) { webMagic.setLabelName(element.getChildNodes().item(0) .getNodeValue()); } if ("labeltype".equals(element.getNodeName())) { webMagic.setLabeltype(element.getChildNodes().item(0) .getNodeValue()); } if ("regexdescendants".equals(element.getNodeName())) { webMagic.setRegexdescendants(element.getChildNodes() .item(0).getNodeValue()); } } } catch (Exception e) { System.out.println("exception:" + e.getMessage()); } return webMagic; }
时间: 2024-10-20 10:23:45