在网上看到的一个HTTP类,很有用!收藏起!
"*/*", //允许所有类型,这句是在拖裤子放屁
'Accept-Language'=>"zh-cn,zh", //客户端使用的语言,有些网页根据这个来放回中文还是英文页面
'Accept-Encoding'=>"gzip, deflate",//支持的压缩方式
'User-Agent'=>"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)", //浏览器名称,有些页面抓不到东西的话考虑改为SE Bot
'Pragma'=>"no-cache", //不缓存
'Cache-Control'=>"no-cache", //不缓存
'Host'=>"", //请求的主机名,所谓的虚拟主机就是根据这个来判断的
'Referer'=>"", //引用页,很多防盗链根据这个来判断
'Cookie'=>"", //Cookie
'Connection'=>"Close" //连接完成后立即关闭
); //默认请求头
protected $Server; //网站服务器地址
protected $Port; //网站服务器端口
protected $ProxyHost; //代理服务器地址
protected $ProxyPort; //代理服务器端口
protected $ProxyUser; //代理验证用户
protected $ProxyPass; //代理验证密码
protected $Document; //请求的文档
public $ErrNo; //错误类型
public $ErrStr; //错误信息
/**
* 构造函数
* 作用:暂无
*/
public function __construct(){
}
/**
* 解析URL中一些字符,比如URL一些中文的请求
*/
private function decode_url($URL,$Method='GET'){
function decode_url($URL,$Method='get'){
$Info = parse_url($URL);
$Method = strtoupper($Method);
$Scheme = strtolower($Info['scheme']);
if($Scheme != 'http' && $Scheme != 'https'){
//非有效的协议
$this->ErrNo = 1;
$this->ErrStr = "No Allow Protolo";
return false;
}
if(empty($Info['host'])){
//没有主机名
$this->ErrNo = 2;
$this->ErrStr = "Invalid Hostname";
return false;
}
$this->Server = gethostbyname($Info['host']);//解析域名
if($this->Server == $Info['host']){
//无法解析
$this->ErrNo = 3;
$this->ErrStr = "Hostname Could't Be Found";
return false;
}
$this->Header['Host'] = $Info['host'];
if($Scheme == 'https'){
$this->SSL = true;
$this->Port = empty($Info['port'])?443:intval($Info['port']);
}else{
$this->Port = empty($Info['port'])?80:intval($Info['port']);
}
#处理GET请求
$Info['query'] = str_replace(array('&','&'),'&',$Info['query']);//先处理一些页面以&的html标签代表&符号
if(isset($Info['query']{2})){
//循环分离出请求的名和值并url编码之,即把page.php?key=var&key2=var...转换为(编码后的key)=(编码后的var)&(编码后的key2)=(编码后的var2)&...
//注意POST要使用urlencode,GET要用rawurlencode
$Tmp = explode('&',$Info['query']);
foreach($Tmp as $Item){
$String = explode('=',$Item);
$Count = count($String);
if($Count == 1){
continue;
}
$Key = $Method == 'POST'?urlencode($String[0]):rawurlencode($String[0]);
if(preg_match('/%[A-F0-9][A-F0-9]/',$String[1])){
//已经编码过的url请求值,只编码请求字符名
$Foo[$Key] = $String[1];
}else{
//没有经过url编码的,编码请求字符名和值
$Var = $Method == 'POST'?urlencode($String[1]):rawurlencode($String[1]);
$Foo[$Key] = $Var;
}
}
$Query = http_build_query($Foo);
}
//路径部分
if(empty($Info['path']) || $Info['path']=='/'){
//使用默认文件/
$Path = '/';
}
$Foo = null;
$Tmp = explode('/',$Info['path']);
array_shift($Tmp);//干掉开始的哪个/
foreach($Tmp as $Item){
//编码路径
$Foo[] = $Method == 'POST'?urlencode($Item):rawurlencode($Item);
}
$Path = implode('/',$Foo);//合并路径
if(isset($Query{1})){
$this->Document = '/'.$Path.'?'.$Query;
return $this->Document;
}else{
$this->Document = '/'.$Path;
return $this->Document;
}
}
}
/**
* 名称:decode_head
* 作用:解析提取出HTTP头的一些信息.如编码,Cookie等.
*/
protected function decode_head($Header){
//提取HTTP状态
$Regexp="/HTTP\/1\.[01] ([0-9]+) [a-z]+/i";
preg_match($Regexp,$Header,$Tmp);
$this->Response['status'] = trim($Tmp[1]);
//提取403或者401转向的URL
$Regexp="/location:([^\n]+)\n/i";
preg_match($Regexp,$Header,$Tmp);
$this->Response['redirect'] = trim($Tmp[1]);
//提取数据传输编码
$Regexp="/Transfer-Encoding:([^\n]+)\n/i";
preg_match($Regexp,$Header,$Tmp);
$this->Response['TransferEncoding'] = strtoupper(trim($Tmp[1]));
//提取内容编码
$Regexp="/Content-Encoding:([^\n]+)\n/i";
preg_match($Regexp,$Header,$Tmp);
$this->Response['ContentEncoding'] = strtoupper(trim($Tmp[1]));
//提取Cookies
$Regexp="/Set-Cookie:((?:[^=]+)=(?:[^\n;]+)).*/i";
preg_match_all($Regexp,$Header,$Tmp);
foreach($Tmp[1] as $Var){
$Var=trim($Var);
$this->Header['Cookie'].="$Var;";
}
$this->Header['Cookie'] = trim($this->Header['Cookie']);
return true;
}
/**
* 名称:Decode Body
* 作用:解析网页内容
*/
protected function decode_body($String,$EOF="\r\n"){
if(strtoupper($this->Response['TransferEncoding'])=='CHUNKED'){
//处理chunked编码的,包含用gzip等压缩
$Return=null;
$EndLength=strlen($EOF);//结束标示长度
do{
//循环处理开始
$String=ltrim($String);//去掉开始的空白字符
$StartPos=strpos($String,$EOF);//计算开始偏移值
$Length=hexdec(substr($String,0,$StartPos));//读取当前字符长度
if($this->Response['ContentEncoding']=='DEFLATE' || $this->Response['ContentEncoding']=='GZIP'){
//如果有压缩先解压,注意如果是压缩的话有前面的10个字符长度的头信息否则解压不了
$Return.=gzinflate(substr($String,($StartPos+$EndLength+10),$Length));
}else{
$Return.=substr($String,($StartPos+$EndLength),$Length);
}
$String=substr($String,($Length+$StartPos+$EndLength));//移动到下段数据,读取的长度+开始位置+结束标示长度
$End=trim($String);//看看是不是到最后了最后是以0结束
}while(!empty($End));
return $Return;
}elseif(strtoupper($this->Response['ContentEncoding']) == 'GZIP' || strtoupper($this->Response['ContentEncoding']) == 'DEFLATE'){
//处理压缩的,注意如果是压缩的话有前面的10个字符长度的头信息否则解压不了
return gzinflate(substr($String,10));
}else{
//没编码和压缩的直接返回
return $String;
}
}
/**
* 增加HTTP头
*/
function add_head($Name,$Value){
return $this->Header[$Name] = $Value;
}
/**
* 名称:Make Head
* 作用:根据请求方法生成HTTP请求头信息
*/
protected function make_head($Method){
$Method = strtoupper($Method);
$AllowMethod = array('GET','HEAD','POST');
if(!in_array($Method,$AllowMethod)){
$this->ErrNo = 5;
$this->ErrStr = 'No Allow HTTP Method';
return false;
}
$this->Request = (!empty($this->ProxyHost)&&!empty($this->ProxyPort))?"{$Method} http://{$this->Header['Host']}{$this->Document} HTTP/{$this->Version}{$this->CRLF}":"$Method $this->Document HTTP/{$this->Version}{$this->CRLF}";
if(isset($this->ProxyUser{1})){
//添加代理验证信息,只支持basic方式
$this->Request .= 'Proxy-Authorization: Basic '.base64_encode("{$this->ProxyUser}:{$this->ProxyUser}").$this->CRLF;
}
foreach($this->Header as $Key=>$Var){
empty($Var)?null:$this->Request .= "$Key: $Var{$this->CRLF}";
}
if($Method == 'GET' || $Method == 'HEAD'){
$this->Request .= $this->CRLF;//不用要忘记在最后加上\r\n\r\n
}
}
/**
* 连接
*/
protected function connect(){
if(isset($this->ProxyHost{1}) && isset($this->ProxyPort{1})){
//如果是使用代理则打开代理服务器的Socket
$this->Socket = fsockopen($this->ProxyHost,$this->ProxyPort,$this->ErrNo,$this->ErrStr,$this->Timeout);
}elseif($this->SSL){
//如果是HTTPS的话需要使用SSL
$this->Socket = fsockopen('ssl://'.$this->Server,$this->Port,$this->ErrNo,$this->ErrStr,$this->Timeout);
}else{
//否则直接连服务端
$this->Socket = fsockopen($this->Server,$this->Port,$this->ErrNo,$this->ErrStr,$this->Timeout);
}
if(!$this->Socket){
return false;
}
return $this->Socket;
}
/**
* 处理
*/
protected function process(){
if(!$this->Socket){
$this->ErrNo = 4;
$this->ErrStr = "Invalid Resouce,Timeout?";
return false;
}
fwrite($this->Socket,$this->Request,strlen($this->Request));
//#http协议说了第一个CRLF前的部分为HTTP头,后面的才是"内容"
do{
$Response.=fgets($this->Socket,512);
}while(strpos($Response,"\r\n\r\n") === false);
#//http协议说了第一个CRLF前的部分为HTTP头,后面的才是"内容"
$this->Response[0] = $Response;
$this->decode_head($Response);
$Response=null;//清空HTTP头信息,以便保存页面内容
while(!feof($this->Socket)){
$Response.=fgets($this->Socket,1024);
}
fclose($this->Socket);
return $this->decode_body($Response);
}
/**
* 设置代理
*/
public function set_proxy($Host,$Port=8080,$User=null,$Pass=null){
$this->ProxyHost = $Host;
$this->ProxyPort = $Port;
$this->ProxyUser = $User;
$this->ProxyPass = $Pass;
return true;
}
/**
* 设置超时
*/
public function set_timeout($Time){
return $this->Timeout = $Time;
}
/**
* 读取服务端返回的Cookie
* 返回值:数组
*/
public function get_cookie(){
$Foo = explode(';',$this->Header['Cookie']);
foreach($Foo as $Item){
$Tmp = explode('=',$Item);
$Cookie[$Tmp[0]] = $Tmp[1];
}
return $Cookie;
}
/**
* 以GET方式访问网页
* $URL是网址
* 返回值:页面内容
*/
public function get($URL){
if(!$this->decode_url($URL)){
return false;
}
if(!isset($this->Header['Referer']{2})){
$this->Header['Referer']="http://{$this->Header['Host']}{$this->Document}";
}
if(!$this->connect()){
return false;
}
//清除前面"请求"清理留下的信息
unset($this->Response);
$this->Header['Content-Type']=null;
$this->Header['Content-Length']=null;
$this->make_head('get');
return $this->process();
}
/**
* 以Head方式判断目标文件纯不纯在,单单以服务端返回200为准,其它的比如301定向后的都为false,这点要注意
* $URL是网址
* 返回值:存在为true,否则为false
*/
public function head($URL){
if(!$this->decode_url($URL)){
return false;
}
if(!isset($this->Header['Referer']{2})){
$this->Header['Referer']="http://{$this->Header['Host']}{$this->Document}";
}
if(!$this->connect()){
return false;
}
$this->make_head('head');
$this->process();
return $this->Response['status']==200?true:false;
}
/**
* 以POST方式访问网页
* $URL是网址
* $Data是要提交的数据
* $File是要提交的文件,文件必须在服务端可以读到!并且$File必须为数组格式为$File['name']='文件路径';
* 返回值:页面内容
*/
public function post($URL,$Data,$File=null){
//清除前面"请求"清理留下的信息
unset($this->Response);
$this->Header['Content-Type']=null;
$this->Header['Content-Length']=null;
if(!is_array($Data)){
$this->ErrNo = 5;
$this->ErrStr = 'The Data Must Be Array';
//return $this->get($URL);
return false;
}
if(is_array($File)){
$Boundary = md5(time());//产生一个MIME分隔符
foreach($Data as $Key => $Var){
#先循环处理数据
if(isset($Boundary{8})){
$Tmp[]="--$Boundary{$this->CRLF}Content-Disposition: form-data; name=\"$Key\"{$this->CRLF}{$this->CRLF}$Var";
}else{
$Tmp[]="$Key=".urlencode($Var);
}
}
}
if(!$this->decode_url($URL)){
return false;
}
unset($this->Header['Connection']);
if(!isset($this->Header['Referer']{2})){
$this->Header['Referer']="http://{$this->Header['Host']}{$this->Document}";
}
if(isset($Boundary{2})){
//循环处理要提交的文件
$this->Version='1.0';//修改http版本为1.0
foreach($File as $Key => $Var){
if(!file_exists($Var)){
//continue;
$this->ErrNo = 7;
$this->ErrStr = 'The File No Exist.';
return false;
}
$Temp[]="--$Boundary{$this->CRLF}Content-Disposition: form-data; name=\"$Key\"; filename=\"".basename($Var)."\"{$this->CRLF}".
"Content-Type: unknow{$this->CRLF}".
"Content-Transfer-Encoding:binary{$this->CRLF}{$this->CRLF}".
file_get_contents($Var);
}
$D = implode($this->CRLF,$Tmp);
$F = implode($this->CRLF,$Temp);
$Data = $D.$this->CRLF.$F."{$this->CRLF}--$Boundary--{$this->CRLF}{$this->CRLF}";
$this->Header['Content-Type']='multipart/form-data, boundary='.$Boundary;
$this->Header['Content-Length']=strlen($Data).$this->CRLF;
$this->make_head('POST');
$this->Request.=$Data.$this->CRLF.$this->CRLF;
}else{
$Data = http_build_query($Tmp);
$this->Header['Content-Type']='application/x-www-form-urlencoded';
$this->Header['Content-Length']=strlen($Data);
$this->Header['Connection']='Close';
$this->make_head('POST');
$this->Request.=$this->CRLF.$Data.$this->CRLF.$this->CRLF;
}
if(!$this->connect()){
return false;
}
return $this->process();
}
}
?>