在网上找了一些,大部分都已经失效,为此我重新整理了一下;特别放出126的代码,163是比较容易抓取的;126有点变态多了一次跳转,比较麻烦
<?php <BR>/** <BR>* @file class.126http.php <BR>* 获得126邮箱通讯录列表 <BR>* @author jvones http://www.jvones.com/blog <BR>* @date 2009-09-26 <BR>**/ <BR>class http126 <BR>{ <BR>private function login($username, $password) <BR>{ <BR>//第一步:初步登陆 <BR>$cookies = array(); <BR>$ch = curl_init(); <BR>curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); <BR>curl_setopt($ch, CURLOPT_URL, "https://reg.163.com/logins.jsp?type=1&product=mail126&url=http://entry.mail.126.com/cgi/ntesdoor?hid%3D10010102%26lightweight%3D1%26verifycookie%3D1%26language%3D0%26style%3D-1"); <BR>curl_setopt($ch, CURLOPT_POST, 1); <BR>curl_setopt($ch, CURLOPT_POSTFIELDS, "username=".$username."@126.com&password=".$password); <BR>curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR); <BR>curl_setopt($ch,CURLOPT_HEADER,1); <BR>curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT); <BR>curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); <BR>$str = curl_exec($ch); <BR>//file_put_contents('./126result.txt', $str); <BR>curl_close($ch); <BR>//获取redirect_url跳转地址,可以从126result.txt中查看,通过正则在$str返回流中匹配该地址 <BR>preg_match("/replace\(\"(.*?)\"\)\;/", $str, $mtitle); <BR>$_url1 = $mtitle[1]; <BR>//file_put_contents('./126resulturl.txt', $redirect_url); <BR>//第二步:再次跳转到到上面$_url1 <BR>$ch = curl_init($_url1); <BR>curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT); <BR>curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); <BR>curl_setopt($ch,CURLOPT_COOKIEFILE,COOKIEJAR); <BR>curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR); <BR>curl_setopt($ch,CURLOPT_HEADER,1); <BR>$str2 = curl_exec($ch); <BR>curl_close($ch); <BR>if (strpos($contents, "安全退出") !== false) <BR>{ <BR>return 0; <BR>} <BR>return 1; <BR>} <BR>/** <BR>* 获取邮箱通讯录-地址 <BR>* @param $user <BR>* @param $password <BR>* @param $result <BR>* @return array <BR>*/ <BR>public function getAddressList($username, $password) <BR>{ <BR>if (!$this->login($username, $password)) <BR>{ <BR>return 0; <BR>} <BR>$header = $this->_getheader($username); <BR>if (!$header['sid']) <BR>{ <BR>return 0; <BR>} <BR>//测试找出sid(很重要)和host <BR>//file_put_contents('./host.txt', $header['host']); <BR>//file_put_contents('./sid.txt', $header['sid']); <BR>//开始进入模拟抓取 <BR>$ch = curl_init(); <BR>curl_setopt($ch, CURLOPT_URL, "http://".$header['host']."/a/s?sid=".$header['sid']."&func=global:sequential"); <BR>curl_setopt($ch, CURLOPT_COOKIEFILE, COOKIEJAR); <BR>curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-Type: application/xml")); <BR>$str = "<?xml version=\"1.0\"?>pab:searchContactsFNtrueuser:getSignaturespab:getAllGroups"; <BR>curl_setopt($ch, CURLOPT_POST, 1); <BR>curl_setopt($ch, CURLOPT_POSTFIELDS, $str); <BR>curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT); <BR>ob_start(); <BR>curl_exec($ch); <BR>$contents = ob_get_contents(); <BR>ob_end_clean(); <BR>curl_close($ch); <BR>//get mail list from the page information username && emailaddress <BR>preg_match_all("/(.*)<\/string>/Umsi",$contents,$mails); <BR>preg_match_all("/(.*)<\/string>/Umsi",$contents,$names); <BR>$users = array(); <BR>foreach($names[1] as $k=>$user) <BR>{ <BR>//$user = iconv($user,<i>本文@来#源gaodai$ma#com搞$$代**码网</i><strong>搞代gaodaima码</strong>'utf-8','gb2312'); <BR>$users[$mails[1][$k]] = $user; <BR>} <BR>if (!$users) <BR>{ <BR>return '您的邮箱中尚未有联系人'; <BR>} <BR>return $users; <BR>} <BR>/** <BR>* Get Header info <BR>*/ <BR>private function _getheader($username) <BR>{ <BR>$ch = curl_init(); <BR>curl_setopt($ch, CURLOPT_URL, "http://entry.mail.126.com/cgi/ntesdoor?hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1&username=".$username."@126.com"); <BR>curl_setopt($ch, CURLOPT_COOKIEFILE, COOKIEJAR); //当前使用的cookie <BR>curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR); //服务器返回的新cookie <BR>curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); <BR>curl_setopt($ch, CURLOPT_HEADER, true); <BR>curl_setopt($ch, CURLOPT_NOBODY, true); <BR>$content=curl_exec($ch); <BR>preg_match_all('/Location:\s*(.*?)\r\n/i',$content,$regs); <BR>$refer = $regs[1][0]; <BR>preg_match_all('/http\:\/\/(.*?)\//i',$refer,$regs); <BR>$host = $regs[1][0]; <BR>preg_match_all("/sid=(.*)/i",$refer,$regs); <BR>$sid = $regs[1][0]; <BR>curl_close($ch); <BR>return array('sid'=>$sid,'refer'=>$refer,'host'=>$host); <BR>} <BR>} <BR>?><BR>