package com.vanwell.module.util.express; import java.io.BufferedReader;import java.io.DataOutputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.net.HttpURLConnection;import java.net.URL;import java.text.SimpleDateFormat;import java.util.UUID; import org.elasticsearch.common.lang3.StringUtils;import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.springframework.beans.BeansException; import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.vanwell.module.common.ErrorConstants;import com.vanwell.module.common.Result;import com.vanwell.module.common.util.CommonUtil;import com.vanwell.module.util.exception.StackTraceUtil;import com.vanwell.module.util.http.HttpUtils;import com.vanwell.module.util.spring.ServiceFactory;import com.vanwell.thirdparty.fedroad.api.QueryTrackApi;import com.vanwell.thirdparty.fedroad.pojo.QueryTrackReq;import com.vanwell.thirdparty.fedroad.pojo.QueryTrackRes; /** * Created by aixiaofeng on 17/2/6. */public class FedroadSpider extends ExpressSpider { private static final SimpleDateFormat FMT_COL_DATE = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); private static final SimpleDateFormat FMT_DATE = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static QueryTrackApi queryTrackApi; @Override public Result<String> doQuery(String express, String expressNo, String attr) { String deliveryNo = null; if (StringUtils.isNotBlank(attr) || !StringUtils.lowerCase(expressNo).startsWith("ec")) { deliveryNo = queryDeliveryNoByApi(express, expressNo, attr); } return queryByPage(express, StringUtils.isNotBlank(deliveryNo) ? deliveryNo : expressNo, attr); } private String queryDeliveryNoByApi(String express, String expressNo, String attr) { if (queryTrackApi == null) { try { queryTrackApi = ServiceFactory.getBean(QueryTrackApi.class); } catch (BeansException e) { queryTrackApi = new QueryTrackApi(); } if (queryTrackApi == null) { queryTrackApi = new QueryTrackApi(); } } QueryTrackReq reqTrack = new QueryTrackReq(); reqTrack.getParameters().setPackageNo(expressNo); Result<QueryTrackRes> res = queryTrackApi.doRequest(reqTrack); if (res.isSuccess() && res.getData() != null && res.getData().getTrackList() != null && CommonUtil.isNotEmpty(res.getData().getTrackList().getTrackList())) { return res.getData().getTrackList().getTrackList().get(0).getDeliveryNo(); } return null; } private Result<String> queryByPage(String express, String expressNo, String attr) { Result<String> result = new Result<>(); String res = ""; String BOUNDARY = UUID.randomUUID().toString(); String urlStr = "https://www.fedroad.com";//访问页面 try { StringBuilder strBuilder = new StringBuilder(); //请求链接,拿到document HttpURLConnection conn = null; Connection connection = HttpUtils.getConnection(urlStr); Connection.Response response = connection.method(Connection.Method.GET).execute(); Document document = response.parse(); //定位到form表单 Elements formDocuments = document.select("#aspnetForm"); //获取conn连接 URL url = new URL(urlStr); conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(5000); conn.setReadTimeout(30000); conn.setDoOutput(true); conn.setDoInput(true); conn.setUseCaches(false); conn.setRequestMethod("POST"); conn.setRequestProperty("Connection", "Keep-Alive"); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"); conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY); OutputStream out = new DataOutputStream(conn.getOutputStream()); //拼POST装请求参数 for (int i = 0; i < formDocuments.select("input").size(); i++) { if (formDocuments.select("input").get(i).attr("class").contains("user_loginout")) { continue; } strBuilder.append("--" + BOUNDARY + "\r\n"); strBuilder.append("Content-Disposition: form-data; name=\"" + formDocuments.select("input").get(i).attr("name") + "\"" + "\r\n\r\n"); if (formDocuments.select("input").get(i).attr("name").contains("search_shippingorder")) { strBuilder.append(expressNo + "\r\n"); } else { strBuilder.append(formDocuments.select("input").get(i).val() + "\r\n"); } } strBuilder.append("--" + BOUNDARY + "--"); out.write(strBuilder.toString().getBytes()); byte[] endData = ("\r\n--" + BOUNDARY + "--\r\n").getBytes(); out.write(endData); out.flush(); out.close(); // 读取返回数据 strBuilder = new StringBuilder(); BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line = reader.readLine()) != null) { strBuilder.append(line).append("\n"); } res = strBuilder.toString(); //关闭 reader.close(); //获取返回的document(就是你需要的) document = Jsoup.parse(res); Elements trackinfo = document.select(".trackinfo tr"); JSONObject json = new JSONObject(); JSONArray arr = new JSONArray(); 通过Jsoup 获取相应的字段 进行组装 for (Element trElement : trackinfo) { if (trElement.select("td").attr("class").contains("title")) { continue; } Elements tdElement = trElement.getElementsByTag("td"); JSONObject item = new JSONObject(); if (tdElement.get(0).text().trim().isEmpty()) { continue; } else { item.put("time", FMT_DATE.format(FMT_COL_DATE.parse(tdElement.get(0).text().trim()))); } item.put("context", tdElement.get(1).text().trim()); arr.add(item); } json.put("data", arr); //成功返回 return result.setSuccess(true).setCode(ErrorConstants.SUCCESS).setData(json.toString()); } catch (Exception e) { result.setCode(ErrorConstants.HTTP_ERR).setMessage(StackTraceUtil.getStackTrace(e)); LOGGER.error(" - doQuery error,express = " + express + "," + expressNo, e); waitRandom(); } //拿到抓取到的参数 return result; } // 测试 public static void main(String[] args) { FedroadSpider spider = new FedroadSpider(); Result<String> ret = spider.doQuery(null, "EC000021436MY", null); System.out.print(ret); }}
时间: 2024-08-12 04:47:50