// 1,抓取无访问控制文件 $ch= curl_init(); curl_setopt($ch, CURLOPT_URL,"http://localhost/mytest/phpinfo.php"); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//如果把这行注释掉的话,就会直接输出 $result=curl_exec($ch); curl_close($ch); // 2,使用代理进行抓取 $ch= curl_init(); curl_setopt($ch, CURLOPT_URL,"http://blog.51yip.com"); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, TRUE); curl_setopt($ch, CURLOPT_PROXY, 125.21.23.6:8080); //url_setopt($ch, CURLOPT_PROXYUSERPWD, ‘user:password‘);如果要密码的话,加上这个 $result=curl_exec($ch); curl_close($ch); // 3,post数据后,抓取数据 $ch= curl_init(); /*在这里需要注意的是,要提交的数据不能是二维数组或者更高 *例如array(‘name‘=>serialize(array(‘tank‘,‘zhang‘)),‘sex‘=>1,‘birth‘=>‘20101010‘) *例如array(‘name‘=>array(‘tank‘,‘zhang‘),‘sex‘=>1,‘birth‘=>‘20101010‘)这样会报错的*/ $data=array(‘name‘=>‘test‘,‘sex‘=>1,‘birth‘=>‘20101010‘); curl_setopt($ch, CURLOPT_URL,‘http://localhost/mytest/curl/upload.php‘); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS,$data); curl_exec($ch); // 4.抓取一些有页面访问控制的页面 $ch= curl_init(); curl_setopt($ch, CURLOPT_URL,"http://club-china"); /*CURLOPT_USERPWD主要用来破解页面访问控制的 *例如平时我们所以htpasswd产生页面控制等。*/ //curl_setopt($ch, CURLOPT_USERPWD, ‘231144:2091XTAjmd=‘); curl_setopt($ch, CURLOPT_HTTPGET, 1); curl_setopt($ch, CURLOPT_REFERER,"http://club-china"); curl_setopt($ch, CURLOPT_HEADER, 0); $result=curl_exec($ch); curl_close($ch); // 5.模拟登录到sina functionchecklogin($user,$password) { if( emptyempty($user) || emptyempty($password) ) { return0; } $ch= curl_init( ); curl_setopt($ch, CURLOPT_REFERER,"http://mail.sina.com.cn/index.html"); curl_setopt($ch, CURLOPT_HEADER, true ); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true ); curl_setopt($ch, CURLOPT_USERAGENT, USERAGENT ); curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR ); curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT ); curl_setopt($ch, CURLOPT_URL,"http://mail.sina.com.cn/cgi-bin/login.cgi"); curl_setopt($ch, CURLOPT_POST, true ); curl_setopt($ch, CURLOPT_POSTFIELDS,"&logintype=uid&u=".urlencode($user)."&psw=".$password); $contents= curl_exec($ch); curl_close($ch); if( !preg_match("/Location: (.*)\\/cgi\\/index\\.php\\?check_time=(.*)\n/",$contents,$matches) ) { return0; }else{ return1; } } define("USERAGENT",$_SERVER[‘HTTP_USER_AGENT‘] ); define("COOKIEJAR", tempnam("/tmp","cookie") ); define("TIMEOUT", 500 ); echochecklogin("zhangying215","xtaj227");
时间: 2024-10-13 16:06:27