使用方法:
require_once "VideoUrlParser.class.php";<BR>$url = "http://v.youku.com/v_show/id_XMjkwMzc0Njg4.html";<BR>$info = VedioUrlParser::parse($url);<BR>echo $info;
说明:调用该工具php文件VideoUrlParser.class.php,$url变量后面的字符串为视频页的地址,然后使用echo输出变量$info。
附:info含有的几个值,分别是img(用于视频缩略图),title(视频标题),url(地址),swf(视频swf播放地址)。我只用到了img和swf地址。具体的可以根据自己的需要进行调整。
VideoUrlParser类源码:
<?php<BR>/**<BR> * Video <BR> * <BR> * @package <BR> * @version 1.2<BR> * @copyright 2005-2011 HDJ.ME <BR> * @author Dijia Huang <BR> * @license PHP Version 3.0 {@link http://www.php.net/license/3_0.txt}<BR> *<BR> * Usage<BR> * require_once "VideoUrlParser.class.php";<BR> * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";<BR> * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";<BR> * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";<BR> * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";<BR> * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";<BR> * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";<BR> * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";<BR> *<BR> * foreach($urls as $url){<BR> * $info = VideoUrlParser::parse($url);<BR> * //var_dump($info);<BR> * echo "{$info['title']}";<BR> * echo "<br />";<BR> * echo $info['object'];<BR> * echo "<br />";<BR> * }<BR> *<BR> *<BR> *<BR> * //优酷<BR> * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html<BR> * <BR> * <BR> * //酷六<BR> * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html<BR> * <BR> * <BR> * //土豆<BR> * http://www.tudou.com/playlist/p/a65929.html?iid=74905844<BR> * <BR> * <BR> * //56<BR> * http://www.56.com/u98/v_NTkyODY2NTU.html<BR> * <BR> * <BR> * //新浪播客<BR> * http://video.sina.com.cn/v/b/46909166-1290055681.html<BR> * <BR> * <BR> * //乐视<BR> * http://www.letv.com/ptv/vplay/1168109.html<BR> * <BR> */</P><P>class VideoUrlParser<BR>{<BR> const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)<BR> Chrome/8.0.552.224 Safari/534.10";<BR> const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";</P><P> /**<BR> * parse <BR> * <BR> * @param string $url <BR> * @param mixed $createObject <BR> * @static<BR> * @access public<BR> * @return void<BR> */<BR> static public function parse($url='', $createObject=true){<BR> $lowerurl = strtolower($url);<BR> preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);<BR> if(!$matches) return false;</P><P> switch($matches[1]){<BR> case 'youku.com':<BR> $data = self::_parseYouku($url);<BR> break;<BR> case 'tudou.com':<BR> $data = self::_parseTudou($url);<BR> break;<BR> case 'ku6.com':<BR> $data = self::_parseKu6($url);<BR> break;<BR> case '56.com':<BR> $data = self::_parse56($url);<BR> break;<BR> case 'letv.com':<BR> $data = self::_parseLetv($url);<BR> break;<BR> case 'video.sina.com.cn':<BR> $data = self::_parseSina($url);<BR> break;<BR> case 'my.tv.sohu.com':<BR> case 'tv.sohu.com':<BR> case 'sohu.com':<BR> $data = self::_parseSohu($url);<BR> break;<BR> case 'v.qq.com':<BR> $data = self::_parseQq($url);<BR> break;<BR> default:<BR> $data = false;<BR> }</P><P> if($data && $createObject) $data['object'] = "";<BR> return $data;<BR> }<BR> /**<BR> * 腾讯视频 <BR> * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0<BR> * http://v.qq.com/play/97abu74o4w3.html<BR> * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html<BR> * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html<BR> * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh<BR> */ <BR> private function _parseQq($url){<BR> if(preg_match("/\/play\//", $url)){<BR> $html = self::_fget($url);<BR> preg_match("/url=[^\"]+/", $html, $matches);<BR> if(!$matches); return false;<BR> $url = $matches[0];<BR> }<BR> preg_match("/vid=([^\_]+)/", $url, $matches);<BR> $vid = $matches[1];<BR> $html = self::_fget($url);<BR> // query<BR> preg_match("/flashvars\s=\s\"([^;]+)/s", $html, $matches);<BR> $query = $matches[1];<BR> if(!$vid){<BR> preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i", $html, $matches);<BR> $vid = $matches[1];<BR> }<BR> $query = str_replace('"+vid+"', $vid, $query);<BR> parse_str($query, $output);<BR> $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";<BR> $data['url'] = $url;<BR> $data['title'] = $output['title'];<BR> $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;<BR> return $data;<BR> }<BR> </P><P> /**<BR> * 优酷网 <BR> * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html<BR> * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf<BR> */ <BR> private function _parseYouku($url){<BR> preg_match("#id\_(\w+)#", $url, $matches);</P><P> if (empty($matches)){<BR> preg_match("#v_playlist\/#", $url, $mat);<BR> if(!$mat) return false;</P><P> $html = self::_fget($url);</P><P> preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);<BR> if(!$matches) return false;<BR> }</P><P> $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";</P><P> $retval = self::_cget($link);<BR> if ($retval) {<BR> $json = json_decode($retval, true);</P><P> $data['img'] = $json['data'][0]['logo'];<BR> $data['title'] = $json['data'][0]['title'];<BR> $data['url'] = $url;<BR> $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";</P><P> return $data;<BR> } else {<BR> return false;<BR> }<BR> }</P><P> /**<BR> * 土豆网<BR> * http://www.tudou.com/programs/view/Wtt3FjiDxEE/<BR> * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf<BR> * <BR> * http://www.tudou.com/playlist/p/a65718.html?iid=74909603<BR> * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf<BR> */<BR> private function _parseTudou($url){<BR> preg_match("#view/([-\w]+)/#", $url, $matches);</P><P> if (empty($matches)) {<BR> if (strpos($url, "/playlist/") == false) return false;</P><P> if(strpos($url, 'iid=') !== false){<BR> $quarr = explode("iid=", $lowerurl);<BR> if (empty($quarr[1])) return false;<BR> }elseif(preg_match("#p\/l(\d+).#", $lowerurl, $quarr)){<BR> if (empty($quarr[1])) return false;<BR> }</P><P> $html = self::_fget($url);<BR> $html = iconv("GB2312", "UTF-8", $html);</P><P> preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);<BR> $icode = $matches[1];</P><P> preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);<BR> $iid = $matches[1];</P><P> preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);<br><br> $find = array("/\n/", '/\s/', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");<BR> $replace = array("", "", ':"",', '\\1"\\2":');<BR> $str = preg_replace($find, $replace, $matches[1]);<BR> //var_dump($str);<BR> $json = json_decode($str);<BR> //var_dump($json);exit;<BR> if(is_array($json) || is_object($json) && !empty($json)){<BR> foreach ($json as $val) {<BR> if ($val->iid == $iid) {<BR> break;<BR> }<BR> }<BR> }</P><P> $data['img'] = $val->pic;<BR> $data['title'] = $val->title;<BR> $data['url'] = $url;<BR> $data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";</P><P> return $data;<BR> }</P><P> $host = "www.tudou.com";<BR> $path = "/v/{$matches[1]}/v.swf";</P><P> $ret = self::_fsget($path, $host);</P><P> if (preg_match("#\nLocation: (.*)\n#", $ret, $mat)) {<BR> parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));</P><P> $data['img'] = $snap_pic;<BR> $data['title'] = $title;<BR> $data['url'] = $url;<BR> $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";</P><P> return $data;<BR> }<BR> return false;<BR> }</P><P> /**<BR> * 酷6网 <BR> * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html<BR> * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html<BR> * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html<BR> * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf<BR> */<BR> private function _parseKu6($url){<BR> if(preg_match("/show\_/", $url)){<BR> preg_match("#/([-\w]+)\.html#", $url, $matches);<BR> $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";<BR> $html = self::_fget($url);</P><P> if ($html) {<BR> $json = json_decode($html, true);<BR> if(!$json) return false;<br><br> $data['img'] = $json['data']['picpath'];<BR> $data['title'] = $json['data']['t'];<BR> $data['url'] = $url;<BR> $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";</P><P> return $data;<BR> } else {<BR> return false;<BR> }<BR> }elseif(preg_match("/show\//", $url, $matches)){<BR> $html = self::_fget($url);<BR> preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);<BR> $str = $matches[1];<BR> // img<BR> preg_match("/cover\s?:\s?\"([^\"]+)\"/", $str, $matches);<BR> $data['img'] = $matches[1];<BR> // title<BR> preg_match("/title\"?\s?:\s?\"([^\"]+)\"/", $str, $matches);<BR> $jsstr = "{\"title\":\"{$matches[1]}\"}";<BR> $json = json_decode($jsstr, true);<BR> $data['title'] = $json['title'];<BR> // url<BR> $data['url'] = $url;<BR> // query<BR> preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);<BR> $query = str_replace("&", '&', $matches[1]);<BR> preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);<BR> $data['swf'] = 'http:'.$matches[0].'?'.$query;<br><br> return $data;<BR> }<BR> }</P><P> /**<BR> * 56网<BR> * http://www.56.com/u73/v_NTkzMDcwNDY.html<BR> * http://player.56.com/v_NTkzMDcwNDY.swf<BR> */<BR> private function _parse56($url){<BR> preg_match("#/v_(\w+)\.html#", $url, $matches);</P><P> if (empty($matches)) return false;</P><P> $link="http://vxml.56.com/json/{$matches[1]}/?src=out";<BR> $retval = self::_cget($link);</P><P> if ($retval) {<BR> $json = json_decode($retval, true);</P><P> $data['img'] = $json['info']['img'];<BR> $data['title'] = $json['info']['Subject'];<BR> $data['url'] = $url;<BR> $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";</P><P> return $data;<BR> } else {<BR> return false;<BR> } <BR> }</P><P> /**<BR> * 乐视网 <BR> * http://www.letv.com/ptv/vplay/1168109.html<BR> * http://www.letv.com/player/x1168109.swf<BR> */<BR> private function _parseLetv($url){<BR> $html = self::_fget($url);<BR> preg_match("#http://v.t.sina.com.cn/([^'\"]*)#", $html, $matches);<BR> parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));<BR> preg_match("#vplay/(\d+)#", $url, $matches);<BR> $data['img'] = $pic;<BR> $data['title'] = $title;<BR> $data['url'] = $url;<BR> $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";</P><P> return $data;<BR> }</P><P> // 搜狐TV http://my.tv.sohu.com/u/vw/5101536<BR> private function _parseSohu($url){<BR> $html = self::_fget($url);<BR> $html = iconv("GB2312", "UTF-8", $html);<BR> preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);<BR> $data['img'] = $matches[1][1];<BR> $data['title'] = $matches[1][0];<BR> $data['url'] = $url;<BR> $data['swf'] = $matches[1][2];<BR> return $data;<BR> }<br><br> /*<BR> * 新浪播客<BR> * http://video.sina.com.cn/v/b/48717043-1290055681.html<BR> * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf<BR> */<BR> private function _parseSina($url){<BR> preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);<BR> $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";<BR> $html = self::_fget($url);<BR> preg_match("/video\s?:\s?([^<]+)}/", $html, $matches);<BR> $find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");<BR> $replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');<BR> $str = preg_replace($find, $replace, $matches[1]);<BR> $arr = json_decode($str, true);</P><P> $data['img'] = $arr['pic'];<BR> $data['title'] = $arr['title'];<BR> $data['url'] = $url;<BR> $data['swf'] = $arr['swfOutsideUrl'];<br><br> return $data;<BR> }</P><P> /*<BR> * 通过 file_get_contents 获取内容<BR> */<BR> private function _fget($url=''){<BR> if(!$url) return false;<BR> $html = file_get_contents($url);<BR> // 判断是否gzip压缩<BR> if($dehtml = self::_gzdecode($html))<BR> return $dehtml;<BR> else<BR> return $html;<BR> }</P><P> /*<BR> * 通过 fsockopen 获取内容<BR> */<BR> private function _fsget($path='/', $host='', $user_agent=''){<BR> if(!$path || !$host) return false;<BR> $user_agent = $user_agent ? $user_agent : self::USER_AGENT;</P><P> $out = <<<HEADER<BR>GET $path HTTP/1.1<BR>Host: $host<BR>User-Agent: $user_agent<BR>Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8<BR>Accept-Language: zh-cn,zh;q=0.5<BR>Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n<BR>HEADER;<BR> $fp = @fsockopen($host, 80, $errno, $errstr, 10);<BR> if (!$fp) return false;<BR> if(!fputs($fp, $out)) return false;<BR> while ( !feof($fp) ) {<BR> $html .= fgets($fp, 1024);<BR> }<BR> fclose($fp);<BR> // 判断是否gzip压缩<BR> if($dehtml = self::_gzdecode($html))<BR> return $dehtml;<BR> else<BR> return $html;<BR> }</P><P> /*<BR> * 通过 curl 获取内容<BR> */<BR> private function _cget($url='', $user_agent=''){<BR> if(!$url) return;</P><P> $user_agent = $user_agent ? $user_agent : self::USER_AGENT;</P><P> $ch = curl_init();<BR> curl_setopt($ch, CURLOPT_URL, $url);<BR> curl_setopt($ch, CURLOPT_HEADER, 0);<BR> if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);</P><P> ob_start();<BR> curl_exec($ch);<BR> $html = ob_get_contents(); <BR> ob_end_clean();</P><P> if(curl_errno($ch)){<BR> curl_close($ch);<BR> return false;<BR> }<BR> curl_close($ch);<BR> if(!is_string($html) || !strlen($html)){<BR> return false;<BR> }<BR> return $html;<BR> // 判断是否gzip压缩<BR> if($dehtml = self::_gzdecode($html))<BR> return $dehtml;<BR> else<BR> return $html;<BR> }<br><br> private function _gzdecode($data) {<BR> $len = strlen ( $data );<BR> if ($len < 18 || strcmp ( substr ( $data, 0, 2 ), "\x1f\x8b" )) {<BR> return null; // Not GZIP format (See RFC 1952) <BR> }<BR> $method = ord ( substr ( $data, 2, 1 ) ); // Compression method <BR> $flags = ord ( substr ( $data, 3, 1 ) ); // Flags <BR> if ($flags & 31 != $flags) {<BR> // Reserved bits are set -- NOT ALLOWED by RFC 1952 <BR> return null;<BR> }<BR> // NOTE: $mtime may be negative (PHP integer limitations) <BR> $mtime = unpack ( "V", substr ( $data, 4, 4 ) );<BR> $mtime = $mtime [1];<BR> $xfl = substr ( $data, 8, 1 );<BR> $os = substr ( $data, 8, 1 );<BR> $headerlen = 10;<BR> $extralen = 0;<BR> $extra = "";<BR> if ($flags & 4) {<BR> // 2-byte length prefixed EXTRA data in header <BR> if ($len - $headerlen - 2 < 8) {<BR> return false; // Invalid format <BR> }<BR> $extralen = unpack ( "v", substr ( $data, 8, 2 ) );<BR> $extralen = $extralen [1];<BR> if ($len - $headerlen - 2 - $extralen < 8) {<BR> return false; // Invalid format <BR> }<BR> $extra = substr ( $data, 10, $extralen );<BR> $headerlen += 2 + $extralen;<BR> }<br><br> $filenamelen = 0;<BR> $filename = "";<BR> if ($flags & 8) {<BR> // C-style string file NAME data in header <BR> if ($len - $headerlen - 1 < 8) {<BR> return false; // Invalid format <BR> }<BR> $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );<BR> if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {<BR> return false; // Invalid format <BR> }<BR> $filename = substr ( $data, $headerlen, $filenamelen );<BR> $headerlen += $filenamelen + 1;<BR> }<br><br> $commentlen = 0;<BR> $comment = "";<BR> if ($flags & 16) {<BR> // C-style string COMMENT data in header <BR> if ($len - $headerlen - 1 < 8) {<BR> return false; // Invalid format <BR> }<BR> $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );<BR> if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {<BR> return false; // Invalid header format <BR> }<BR> $comment = substr ( $data, $headerlen, $commentlen );<BR> $headerlen += $commentlen + 1;<BR> }<br><br> $headercrc = "";<BR> if ($flags & 1) {<BR> // 2-bytes (lowest order) of CRC32 on header present <BR> if ($len - $headerlen - 2 < 8) {<BR> return false; // Invalid format <BR> }<BR> $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;<BR> $headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );<BR> $headercrc = $headercrc [1];<BR> if ($headercrc != $calccrc) {<BR> return false; // Bad header CRC <BR> }<BR> $headerlen += 2;<BR> }<br><br> // GZIP FOOTER - These be negative due to PHP's limitations <BR> $datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );<BR> $datacrc = $datacrc [1];<BR> $isize = unpack ( "V", substr ( $data, - 4 ) );<BR> $isize = $isize [1];<br><br> // Perform the decompression: <BR> $bodylen = $len - $headerlen - 8;<BR> if ($bodylen < 1) {<BR> // This should never happen - IMPLEMENTATION BUG! <BR> return null;<BR> }<BR> $body = substr ( $data, $headerlen, $bodylen );<BR> $data = "";<BR> if ($bodylen > 0) {<BR> switch ($method) {<BR> case 8 :<BR> // Currently the only supported compression method: <BR> $data = gzinflate ( $body );<BR> break;<BR> default :<BR> // Unknown compression method <BR> return false;<BR> }<BR> } else {<BR> //...<BR> }<br><br> if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {<BR> // Bad format! Length or CRC doesn't match! <BR> return <i style="color:transparent">本文来源gaodai$ma#com搞$$代**码)网8</i><strong>搞代gaodaima码</strong>false;<BR> }<BR> return $data;<BR> }<BR>}<BR>