//登录csdn
// String uri = "https://passport.csdn.net/account/login";
// String html = HttpUtil.DownHtml(uri);
// <input type="hidden" name="lt" value="LT-207426-moK0sGnfCa9aqijJKeLYhFDYiEe2id" />
// <input type="hidden" name="execution" value="e1s1" />
// <input type="hidden" name="_eventId" value="submit" />
// String lt = getGroup_1("name=\"lt\" value=\"(.*?)\"", html);
// String execution = getGroup_1("name=\"execution\" value=\"(.*?)\"", html);
// System.out.println(lt + "\t" + execution);
//
// //构建cookie
// Map<String, String> params = new HashMap<String,String>();
// params.put("_eventId", "submit");
// params.put("execution", execution);
// params.put("lt", lt);
// params.put("password", "biantai123");
// params.put("username", "[email protected]");
//
// HttpUtil.Post(uri, params);
模拟登录流程:
1 请求host_url
2 从host_url中解析出 隐藏表单 的值 添加到POST_DATA中
3 添加账户,密码到POST_DATA中
4 编码后,发送POST请求
要点1:java下,HttpClient必须是单例模式
要点2:post的url可能跟登录界面的url不同。post_url可以从host_url的返回结果中得到(具体情况自行分析)
5 通过firefox,chrome等相关插件验证登录完成
6 测试需要登录的采集任务
登录插件工厂设计:
设计目标(粗略):
接口设计:
public interface ILogin {
//HttpClient
public HttpClient login(String usr, String pwd, Map<k, v> metadata);
}
example:
public class BaiduLogin implements ILogin {
@Override
public HttpClient login(String usr, String pwd, Map<k, v> metadata) {
System.out.println("萤火虫在飞");
}
}
//采用线程安全的方式返回HttpClient。。。返回HttpClient 或者 返回cookie 具体待定
public static synchronized HttpClient getSaveHttpClient(){
if(mHttpClient == null){
HttpParams params = new BasicHttpParams();
//设置基本参数
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, CHARSET);
HttpProtocolParams.setUseExpectContinue(params, true);
//超时设置
/*从连接池中取连接的超时时间*/
ConnManagerParams.setTimeout(params, 1000);
/*连接超时*/
HttpConnectionParams.setConnectionTimeout(params, 2000);
/*请求超时*/
HttpConnectionParams.setSoTimeout(params, 4000);
//设置HttpClient支持HTTp和HTTPS两种模式
SchemeRegistry schReg = new SchemeRegistry();
schReg.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
schReg.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
//使用线程安全的连接管理来创建HttpClient
ClientConnectionManager conMgr = new ThreadSafeClientConnManager(params, schReg);
mHttpClient = new DefaultHttpClient(conMgr, params);
}
return mHttpClient;
}
HttpClient(http://hc.apache.org/httpcomponents-client-ga/)自动管理了cookie信息,
只需要先传递登录信息执行登录过程,然后直接访问想要的页面,跟访问一个普通的页面没有任何区别,
因为HttpClient已经帮忙发送了Cookie信息。下面的例子实现了这样一个访问的过程。
http://xugou4-yahoo-com-cn.iteye.com/blog/1308457
http://blog.csdn.net/yanzi1225627/article/details/24937439
public class RenRen {
// 配置参数
private static String userName = "邮箱地址";
private static String password = "密码";
private static String redirectURL =
"http://blog.renren.com/blog/304317577/449470467"; //要抓取的网址// 登录URL地址
private static String renRenLoginURL = "http://www.renren.com/PLogin.do";// 用于取得重定向地址
private HttpResponse response;
// 在一个会话中用到的httpclient对象
private DefaultHttpClient httpclient = new DefaultHttpClient();//登录到页面
private boolean login() {
//根据登录页面地址初始化httpost对象
HttpPost httpost = new HttpPost(renRenLoginURL);
//POST给网站的所有参数
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("origURL", redirectURL));
nvps.add(new BasicNameValuePair("domain", "renren.com"));
nvps.add(new BasicNameValuePair("isplogin", "true"));
nvps.add(new BasicNameValuePair("formName", ""));
nvps.add(new BasicNameValuePair("method", ""));
nvps.add(new BasicNameValuePair("submit", "登录"));
nvps.add(new BasicNameValuePair("email", userName));
nvps.add(new BasicNameValuePair("password", password));
try {
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
response = httpclient.execute(httpost);
} catch (Exception e) {
e.printStackTrace();
return false;
} finally {
httpost.abort();
}
return true;
}//取得重定向地址
private String getRedirectLocation() {
Header locationHeader = response.getFirstHeader("Location");
if (locationHeader == null) {
return null;
}
return locationHeader.getValue();
}//根据重定向地址返回内容
private String getText(String redirectLocation) {
HttpGet httpget = new HttpGet(redirectLocation);
// 创建一个响应处理器
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = "";
try {
//取得网页内容
responseBody = httpclient.execute(httpget, responseHandler);
} catch (Exception e) {
e.printStackTrace();
responseBody = null;
} finally {
httpget.abort();
httpclient.getConnectionManager().shutdown();//关闭连接
}
return responseBody;
}public void printText() {
if (login()) {
String redirectLocation = getRedirectLocation();
if (redirectLocation != null) {
System.out.println(getText(redirectLocation));
}
}
}public static void main(String[] args) {
RenRen renRen = new RenRen();
renRen.printText();
}
}
httpclient 模拟登录 及线程安全