1 // 1,抓取无访问控制文件 2 3 $ch= curl_init(); 4 curl_setopt($ch, CURLOPT_URL,"http://localhost/mytest/phpinfo.php"); 5 curl_setopt($ch, CURLOPT_HEADER, false); 6 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//如果把这行注释掉的话,就会直接输出 7 $result=curl_exec($ch); 8 curl_close($ch); 9 10 // 2,使用代理进行抓取 11 12 $ch= curl_init(); 13 curl_setopt($ch, CURLOPT_URL,"http://blog.51yip.com"); 14 curl_setopt($ch, CURLOPT_HEADER, false); 15 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 16 curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, TRUE); 17 curl_setopt($ch, CURLOPT_PROXY, 125.21.23.6:8080); 18 //url_setopt($ch, CURLOPT_PROXYUSERPWD, ‘user:password‘);如果要密码的话,加上这个 19 $result=curl_exec($ch); 20 curl_close($ch); 21 22 // 3,post数据后,抓取数据 23 24 $ch= curl_init(); 25 /*在这里需要注意的是,要提交的数据不能是二维数组或者更高 26 *例如array(‘name‘=>serialize(array(‘tank‘,‘zhang‘)),‘sex‘=>1,‘birth‘=>‘20101010‘) 27 *例如array(‘name‘=>array(‘tank‘,‘zhang‘),‘sex‘=>1,‘birth‘=>‘20101010‘)这样会报错的*/ 28 $data=array(‘name‘=>‘test‘,‘sex‘=>1,‘birth‘=>‘20101010‘); 29 curl_setopt($ch, CURLOPT_URL,‘http://localhost/mytest/curl/upload.php‘); 30 curl_setopt($ch, CURLOPT_POST, 1); 31 curl_setopt($ch, CURLOPT_POSTFIELDS,$data); 32 curl_exec($ch); 33 34 // 4.抓取一些有页面访问控制的页面 35 36 $ch= curl_init(); 37 curl_setopt($ch, CURLOPT_URL,"http://club-china"); 38 /*CURLOPT_USERPWD主要用来破解页面访问控制的 39 *例如平时我们所以htpasswd产生页面控制等。*/ 40 //curl_setopt($ch, CURLOPT_USERPWD, ‘231144:2091XTAjmd=‘); 41 curl_setopt($ch, CURLOPT_HTTPGET, 1); 42 curl_setopt($ch, CURLOPT_REFERER,"http://club-china"); 43 curl_setopt($ch, CURLOPT_HEADER, 0); 44 $result=curl_exec($ch); 45 curl_close($ch); 46 47 // 5.模拟登录到sina 48 49 functionchecklogin($user,$password) 50 { 51 if( emptyempty($user) || emptyempty($password) ) 52 { 53 return0; 54 } 55 $ch= curl_init( ); 56 curl_setopt($ch, CURLOPT_REFERER,"http://mail.sina.com.cn/index.html"); 57 curl_setopt($ch, CURLOPT_HEADER, true ); 58 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true ); 59 curl_setopt($ch, CURLOPT_USERAGENT, USERAGENT ); 60 curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR ); 61 curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT ); 62 curl_setopt($ch, CURLOPT_URL,"http://mail.sina.com.cn/cgi-bin/login.cgi"); 63 curl_setopt($ch, CURLOPT_POST, true ); 64 curl_setopt($ch, CURLOPT_POSTFIELDS,"&logintype=uid&u=".urlencode($user)."&psw=".$password); 65 $contents= curl_exec($ch); 66 curl_close($ch); 67 if( !preg_match("/Location: (.*)\\/cgi\\/index\\.php\\?check_time=(.*)\n/",$contents,$matches) ) 68 { 69 return0; 70 }else{ 71 return1; 72 } 73 } 74 75 define("USERAGENT",$_SERVER[‘HTTP_USER_AGENT‘] ); 76 define("COOKIEJAR", tempnam("/tmp","cookie") ); 77 define("TIMEOUT", 500 ); 78 79 echochecklogin("zhangying215","xtaj227");
时间: 2024-11-05 20:44:12