httpclient入门: http://www.ibm.com/developerworks/cn/opensource/os-httpclient/
httpclient证书导入:http://www.blogjava.net/happytian/archive/2006/12/22/89447.html
httpclient高级认识:http://laohuang.iteye.com/blog/55613
httpclient官方文档:http://hc.apache.org/httpcomponents-client/index.html
httpclient资源关闭:http://www.iteye.com/topic/234759
要注意的有以下几点:
1、httpclient连接后资源释放问题很重要,就跟我们用database connection要释放资源一样。
2、https网站采用ssl加密传输,证书导入要注意。
3、做这样的项目最好先了解下http协义,比如302,301,200,404返回代码的含义(这是最基本的),cookie,session的机制。
4、httpclient的redirect状态默认是自动的,这在很大程度上给开发者很大的方便(如一些授权获得cookie),但是有时要手动管理下,比如
有时会遇到CircularRedirectException异常,出现这样的情况是因为返回的头文件中location值指向之前重复(端口号可以不同)地址,导致可能会出现死
循环递归重定向,这时可以手动关闭:method.setFollowRedirects(false)
5、有的网站会先判别用户的请求是否是来自浏览器,如不是,则返回不正确的文本,所以用httpclient抓取信息时在头部加入如下信息:
header.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0;
Windows NT 5.1; SV1; QQDownload 1.7; .NET CLR 1.1.4322; CIBA; .NET CLR
2.0.50727)");
6、当post请求提交数据时要改变默认编码,不然的话提交上去的数据会出现乱码。重写postMethod的setContentCharSet()方法就可以了。
处理request请求返回的文本的通用类:
1 /* 2 * HttpRequestProxy.java 3 * 4 * Created on November 3, 2008, 9:53 AM 5 */ 6 7 package cn.com.mozat.net; 8 9 import java.io.BufferedReader; 10 import java.io.IOException; 11 import java.io.InputStream; 12 import java.io.InputStreamReader; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Map; 16 import java.util.Set; 17 18 import org.apache.commons.httpclient.Header; 19 import org.apache.commons.httpclient.HttpClient; 20 import org.apache.commons.httpclient.HttpException; 21 import org.apache.commons.httpclient.HttpMethod; 22 import org.apache.commons.httpclient.NameValuePair; 23 import org.apache.commons.httpclient.SimpleHttpConnectionManager; 24 import org.apache.commons.httpclient.methods.GetMethod; 25 import org.apache.commons.httpclient.methods.PostMethod; 26 27 import cn.com.mozat.exception.CustomException; 28 29 /** 30 * 31 * @author bird email:[email protected] 32 * 33 * 2008-11-4 09:49:48 34 */ 35 public class HttpRequestProxy{ 36 //超时间隔 37 private static int connectTimeOut = 60000; 38 //让connectionmanager管理httpclientconnection时是否关闭连接 39 private static boolean alwaysClose = false; 40 //返回数据编码格式 41 private String encoding = "UTF-8"; 42 43 private final HttpClient client = new HttpClient(new SimpleHttpConnectionManager(alwaysClose)); 44 45 public HttpClient getHttpClient(){ 46 return client; 47 } 48 49 /** 50 * 用法: 51 * HttpRequestProxy hrp = new HttpRequestProxy(); 52 * hrp.doRequest("http://www.163.com",null,null,"gbk"); 53 * 54 * @param url 请求的资源URL 55 * @param postData POST请求时form表单封装的数据 没有时传null 56 * @param header request请求时附带的头信息(header) 没有时传null 57 * @param encoding response返回的信息编码格式 没有时传null 58 * @return response返回的文本数据 59 * @throws CustomException 60 */ 61 public String doRequest(String url,Map postData,Map header,String encoding) throws CustomException{ 62 String responseString = null; 63 //头部请求信息 64 Header[] headers = null; 65 if(header != null){ 66 Set entrySet = header.entrySet(); 67 int dataLength = entrySet.size(); 68 headers= new Header[dataLength]; 69 int i = 0; 70 for(Iterator itor = entrySet.iterator();itor.hasNext();){ 71 Map.Entry entry = (Map.Entry)itor.next(); 72 headers[i++] = new Header(entry.getKey().toString(),entry.getValue().toString()); 73 } 74 } 75 //post方式 76 if(postData!=null){ 77 PostMethod postRequest = new PostMethod(url.trim()); 78 if(headers != null){ 79 for(int i = 0;i < headers.length;i++){ 80 postRequest.setRequestHeader(headers[i]); 81 } 82 } 83 Set entrySet = postData.entrySet(); 84 int dataLength = entrySet.size(); 85 NameValuePair[] params = new NameValuePair[dataLength]; 86 int i = 0; 87 for(Iterator itor = entrySet.iterator();itor.hasNext();){ 88 Map.Entry entry = (Map.Entry)itor.next(); 89 params[i++] = new NameValuePair(entry.getKey().toString(),entry.getValue().toString()); 90 } 91 postRequest.setRequestBody(params); 92 try { 93 responseString = this.executeMethod(postRequest,encoding); 94 } catch (CustomException e) { 95 throw e; 96 } finally{ 97 postRequest.releaseConnection(); 98 } 99 } 100 //get方式 101 if(postData == null){ 102 GetMethod getRequest = new GetMethod(url.trim()); 103 if(headers != null){ 104 for(int i = 0;i < headers.length;i++){ 105 getRequest.setRequestHeader(headers[i]); 106 } 107 } 108 try { 109 responseString = this.executeMethod(getRequest,encoding); 110 } catch (CustomException e) { 111 e.printStackTrace(); 112 throw e; 113 }finally{ 114 getRequest.releaseConnection(); 115 } 116 } 117 118 return responseString; 119 } 120 121 private String executeMethod(HttpMethod request, String encoding) throws CustomException{ 122 String responseContent = null; 123 InputStream responseStream = null; 124 BufferedReader rd = null; 125 try { 126 this.getHttpClient().executeMethod(request); 127 if(encoding != null){ 128 responseStream = request.getResponseBodyAsStream(); 129 rd = new BufferedReader(new InputStreamReader(responseStream, 130 encoding)); 131 String tempLine = rd.readLine(); 132 StringBuffer tempStr = new StringBuffer(); 133 String crlf=System.getProperty("line.separator"); 134 while (tempLine != null) 135 { 136 tempStr.append(tempLine); 137 tempStr.append(crlf); 138 tempLine = rd.readLine(); 139 } 140 responseContent = tempStr.toString(); 141 }else 142 responseContent = request.getResponseBodyAsString(); 143 144 Header locationHeader = request.getResponseHeader("location"); 145 //返回代码为302,301时,表示页面己经重定向,则重新请求location的url,这在 146 //一些登录授权取cookie时很重要 147 if (locationHeader != null) { 148 String redirectUrl = locationHeader.getValue(); 149 this.doRequest(redirectUrl, null, null,null); 150 } 151 } catch (HttpException e) { 152 throw new CustomException(e.getMessage()); 153 } catch (IOException e) { 154 throw new CustomException(e.getMessage()); 155 156 } finally{ 157 if(rd != null) 158 try { 159 rd.close(); 160 } catch (IOException e) { 161 throw new CustomException(e.getMessage()); 162 } 163 if(responseStream != null) 164 try { 165 responseStream.close(); 166 } catch (IOException e) { 167 throw new CustomException(e.getMessage()); 168 169 } 170 } 171 return responseContent; 172 } 173 174 175 /** 176 * 特殊请求数据,这样的请求往往会出现redirect本身而出现递归死循环重定向 177 * 所以单独写成一个请求方法 178 * 比如现在请求的url为:http://localhost:8080/demo/index.jsp 179 * 返回代码为302 头部信息中location值为:http://localhost:8083/demo/index.jsp 180 * 这时httpclient认为进入递归死循环重定向,抛出CircularRedirectException异常 181 * @param url 182 * @return 183 * @throws CustomException 184 */ 185 public String doSpecialRequest(String url,int count,String encoding) throws CustomException{ 186 String str = null; 187 InputStream responseStream = null; 188 BufferedReader rd = null; 189 GetMethod getRequest = new GetMethod(url); 190 //关闭httpclient自动重定向动能 191 getRequest.setFollowRedirects(false); 192 try { 193 194 this.client.executeMethod(getRequest); 195 Header header = getRequest.getResponseHeader("location"); 196 if(header!= null){ 197 //请求重定向后的URL,count同时加1 198 this.doSpecialRequest(header.getValue(),count+1, encoding); 199 } 200 //这里用count作为标志位,当count为0时才返回请求的URL文本, 201 //这样就可以忽略所有的递归重定向时返回文本流操作,提高性能 202 if(count == 0){ 203 getRequest = new GetMethod(url); 204 getRequest.setFollowRedirects(false); 205 this.client.executeMethod(getRequest); 206 responseStream = getRequest.getResponseBodyAsStream(); 207 rd = new BufferedReader(new InputStreamReader(responseStream, 208 encoding)); 209 String tempLine = rd.readLine(); 210 StringBuffer tempStr = new StringBuffer(); 211 String crlf=System.getProperty("line.separator"); 212 while (tempLine != null) 213 { 214 tempStr.append(tempLine); 215 tempStr.append(crlf); 216 tempLine = rd.readLine(); 217 } 218 str = tempStr.toString(); 219 } 220 221 } catch (HttpException e) { 222 throw new CustomException(e.getMessage()); 223 } catch (IOException e) { 224 throw new CustomException(e.getMessage()); 225 } finally{ 226 getRequest.releaseConnection(); 227 if(rd !=null) 228 try { 229 rd.close(); 230 } catch (IOException e) { 231 throw new CustomException(e.getMessage()); 232 } 233 if(responseStream !=null) 234 try { 235 responseStream.close(); 236 } catch (IOException e) { 237 throw new CustomException(e.getMessage()); 238 } 239 } 240 return str; 241 } 242 243 244 245 246 public static void main(String[] args) throws Exception{ 247 HttpRequestProxy hrp = new HttpRequestProxy(); 248 Map header = new HashMap(); 249 header.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7; .NET CLR 1.1.4322; CIBA; .NET CLR 2.0.50727)"); 250 String str = hrp.doRequest( 251 "http://www.cma-cgm.com/en/eBusiness/Tracking/Default.aspx?BolNumber=GZ2108827", 252 null, header,null); 253 System.out.println(str.contains("row_CRXU1587647")); 254 // System.out.println(str); 255 } 256 257 }