标题:不用iconv函数实现UTF-8编码转换GB2312的PHP函数 出处:沧海一粟 时间:Wed, 11 Oct 2006 16:08:27 +0000 作者:jed 地址:http://www.dzhope.com/post/109/ 内容: 如果使用 iconv() 函数转换编码就相比比较简单了,不过很多虚拟主机里并不支持这个组件,我在网上找半天,才找到一个gb2312转utf-8的方法,但不能逆向转换。 这个函数如下: /******************************* //GB转UTF-8编码 *******************************/ function gb2utf8($gbstr) { global $CODETABLE; if(trim($gbstr)=="") return $gbstr; if(empty($CODETABLE)){  $filename = dirname(__FILE__)."/gb2312-utf8.table";  $fp = fopen($filename,"r");  while ($l = fgets($fp,15))  { $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6); }  fclose($fp); } $ret = ""; $utf8 = ""; while ($gbstr) {  if (ord(substr($gbstr, 0, 1)) > 127) {   $thisW = substr($gbstr, 0, 2);   $gbstr = substr($gbstr, 2, strlen($gbstr));   $utf8 = "";   @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));   if($utf8!=""){    for ($i = 0;$i < strlen($utf8);$i += 3)     $ret .= chr(substr($utf8, $i, 3));   }  }  else  {   $ret .= substr($gbstr, 0, 1);   $gbstr = substr($gbstr, 1, strlen($gbstr));  } } return $ret; } //Unicode转utf8 function u2utf8($c) { for ($i = 0;$i < count($c);$i++)  $str = ""; if ($c < 0x80) {  $str .= $c; } else if ($c < 0x800) {  $str .= (0xC0 | $c >> 6);  $str .= (0x80 | $c & 0x3F); } else if ($c < 0x10000) {  $str .= (0xE0 | $c >> 12);  $str .= (0x80 | $c >> 6 & 0x3F);  $str .= (0x80 | $c & 0x3F); } else if ($c < 0x200000) {  $str .= (0xF0 | $c >> 18);  $str .= (0x80 | $c >> 12 & 0x3F);  $str .= (0x80 | $c >> 6 & 0x3F);  $str .= (0x80 | $c & 0x3F); } return $str; } 因为gb2312都是双字节的,因此转换为utf-8就相对比较简单,但反之有很麻烦了,我尝试了一下: 这样 function utf82gb($utfstr) { global $UC2GBTABLE; $okstr = ""; if(trim($utfstr)=="") return $utfstr; if(empty($UC2GBTABLE)){  $filename = dirname(__FILE__)."/gb2312-utf8.table";  $fp = fopen($filename,"r");  while($l = fgets($fp,15))  { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}  fclose($fp); } $ulen = strlen($utfstr); for($i=0;$i<$ulen;$i++) {  if(ord($utfstr[$i])<0x81) $okstr .= $utfstr[$i];  else  {   if($ulen>$i+2)   {    $utfc = substr($utfstr,$i,3);    $c = "";    @$c = dechex($UC2GBTABLE[utf82u_3($utfc)]+0x8080);    if($c!=""){       $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));    }   }   else   { $okstr .= $utfstr[$i]; }  }  }  $okstr = trim($okstr);  return $okstr; } function utf82u_3($c) {      $n = (ord($c[0]) & 0x1f) << 12;      $n += (ord($c[1]) & 0x3f) << 6;      $n += ord($c[2]) & 0x3f;      return $n; } 按这种方法,大部份字符也算是能转换成功的了,不过总是有点不妥之处,我把程序改成这样子: function utf82gb($utfstr) { global $UC2GBTABLE; $okstr = ""; if(trim($utfstr)=="") return $utfstr; if(empty($UC2GBTABLE)){  $filename = dirname(__FILE__)."/gb2312-utf8.table";  $fp = fopen($filename,"r");  while($l = fgets($fp,15))  { $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}  fclose($fp); } $okstr = ""; $utfstr = urlencode($utfstr); $ulen = strlen($utfstr); for($i=0;$i<$ulen;$i++) {  if($utfstr[$i]=="%")  {   if($ulen>$i+2){    $hexnext = hexdec("0x".substr($utfstr,$i+1,2));    if($hexnext<127){     $okstr .= chr($hexnext);     $i = $i+2;    }    else{     if($ulen>=$i+9){      $hexnext = substr($utfstr,$i+1,8);      $c = "";      @$c = dechex($UC2GBTABLE[url_utf2u($hexnext)]+0x8080);      if($c!=""){        $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));      }      $i = $i+8;     }    }   }   else   { $okstr .= $utfstr[$i]; }  }  else if($utfstr[$i]=="+")   $okstr .= " ";  else   $okstr .= $utfstr[$i]; } $okstr = trim($okstr); return $okstr; } //三字节的URL编码转成的utf8字符转为unicode编码 function url_utf2u($c) { $utfc = ""; $cs = split("%",$c); for($i=0;$i