<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title><![CDATA[沧海一粟]]></title> 
<link>http://www.dzhope.com/index.php</link> 
<description><![CDATA[Web系统架构与服务器运维,php开发]]></description> 
<language>zh-cn</language> 
<copyright><![CDATA[沧海一粟]]></copyright>
<item>
<link>http://www.dzhope.com/post//</link>
<title><![CDATA[不用iconv函数实现UTF-8编码转换GB2312的PHP函数]]></title> 
<author>jed &lt;jed521@163.com&gt;</author>
<category><![CDATA[代码编程]]></category>
<pubDate>Wed, 11 Oct 2006 08:08:27 +0000</pubDate> 
<guid>http://www.dzhope.com/post//</guid> 
<description>
<![CDATA[ 
	如果使用 iconv() 函数转换编码就相比比较简单了，不过很多虚拟主机里并不支持这个组件，我在网上找半天，才找到一个gb2312转utf-8的方法，但不能逆向转换。 <br/><br/>这个函数如下： <br/><br/>/*******************************<br/>//GB转UTF-8编码<br/>*******************************/<br/>function gb2utf8($gbstr) {<br/> global $CODETABLE;<br/> if(trim($gbstr)==&quot;&quot;) return $gbstr;<br/> if(empty($CODETABLE)){<br/> &amp;nbsp;$filename = dirname(__FILE__).&quot;/gb2312-utf8.table&quot;;<br/> &amp;nbsp;$fp = fopen($filename,&quot;r&quot;);<br/> &amp;nbsp;while ($l = fgets($fp,15))<br/> &amp;nbsp;{ $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6); }<br/> &amp;nbsp;fclose($fp);<br/> }<br/> $ret = &quot;&quot;;<br/> $utf8 = &quot;&quot;;<br/> while ($gbstr) {<br/> &amp;nbsp;if (ord(substr($gbstr, 0, 1)) &gt; 127) {<br/> &amp;nbsp; $thisW = substr($gbstr, 0, 2);<br/> &amp;nbsp; $gbstr = substr($gbstr, 2, strlen($gbstr));<br/> &amp;nbsp; $utf8 = &quot;&quot;;<br/> &amp;nbsp; @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));<br/> &amp;nbsp; if($utf8!=&quot;&quot;){<br/> &amp;nbsp; &amp;nbsp;for ($i = 0;$i &lt; strlen($utf8);$i += 3)<br/> &amp;nbsp; &amp;nbsp; $ret .= chr(substr($utf8, $i, 3));<br/> &amp;nbsp; }<br/> &amp;nbsp;}<br/> &amp;nbsp;else<br/> &amp;nbsp;{<br/> &amp;nbsp; $ret .= substr($gbstr, 0, 1);<br/> &amp;nbsp; $gbstr = substr($gbstr, 1, strlen($gbstr));<br/> &amp;nbsp;}<br/> }<br/> return $ret;<br/>}<br/>//Unicode转utf8<br/>function u2utf8($c) {<br/> for ($i = 0;$i &lt; count($c);$i++)<br/> &amp;nbsp;$str = &quot;&quot;;<br/> if ($c &lt; 0x80) {<br/> &amp;nbsp;$str .= $c;<br/> } else if ($c &lt; 0x800) {<br/> &amp;nbsp;$str .= (0xC0 &amp;#124; $c &gt;&gt; 6);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &amp; 0x3F);<br/> } else if ($c &lt; 0x10000) {<br/> &amp;nbsp;$str .= (0xE0 &amp;#124; $c &gt;&gt; 12);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &gt;&gt; 6 &amp; 0x3F);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &amp; 0x3F);<br/> } else if ($c &lt; 0x200000) {<br/> &amp;nbsp;$str .= (0xF0 &amp;#124; $c &gt;&gt; 18);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &gt;&gt; 12 &amp; 0x3F);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &gt;&gt; 6 &amp; 0x3F);<br/> &amp;nbsp;$str .= (0x80 &amp;#124; $c &amp; 0x3F);<br/> }<br/> return $str;<br/>} <br/><br/>因为gb2312都是双字节的，因此转换为utf-8就相对比较简单，但反之有很麻烦了，我尝试了一下： <br/><br/>这样 <br/><br/>function utf82gb($utfstr)<br/>{<br/> global $UC2GBTABLE;<br/> $okstr = &quot;&quot;;<br/> if(trim($utfstr)==&quot;&quot;) return $utfstr;<br/> if(empty($UC2GBTABLE)){<br/> &amp;nbsp;$filename = dirname(__FILE__).&quot;/gb2312-utf8.table&quot;;<br/> &amp;nbsp;$fp = fopen($filename,&quot;r&quot;);<br/> &amp;nbsp;while($l = fgets($fp,15))<br/> &amp;nbsp;{ $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}<br/> &amp;nbsp;fclose($fp);<br/> }<br/> $ulen = strlen($utfstr);<br/> for($i=0;$i&lt;$ulen;$i++)<br/> {<br/> &amp;nbsp;if(ord($utfstr[$i])&lt;0x81) $okstr .= $utfstr[$i];<br/> &amp;nbsp;else<br/> &amp;nbsp;{<br/> &amp;nbsp; if($ulen&gt;$i+2)<br/> &amp;nbsp; {<br/> &amp;nbsp; &amp;nbsp;$utfc = substr($utfstr,$i,3);<br/> &amp;nbsp; &amp;nbsp;$c = &quot;&quot;;<br/> &amp;nbsp; &amp;nbsp;@$c = dechex($UC2GBTABLE[utf82u_3($utfc)]+0x8080);<br/> &amp;nbsp; &amp;nbsp;if($c!=&quot;&quot;){<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp; $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));<br/> &amp;nbsp; &amp;nbsp;}<br/> &amp;nbsp; }<br/> &amp;nbsp; else<br/> &amp;nbsp; { $okstr .= $utfstr[$i]; }<br/> &amp;nbsp;}<br/> &amp;nbsp;}<br/> &amp;nbsp;$okstr = trim($okstr);<br/> &amp;nbsp;return $okstr;<br/>} <br/><br/>function utf82u_3($c)<br/>{<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$n = (ord($c[0]) &amp; 0x1f) &lt;&lt; 12;<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$n += (ord($c[1]) &amp; 0x3f) &lt;&lt; 6;<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$n += ord($c[2]) &amp; 0x3f;<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;return $n;<br/>} <br/><br/>按这种方法，大部份字符也算是能转换成功的了，不过总是有点不妥之处，我把程序改成这样子： <br/><br/>function utf82gb($utfstr)<br/>{<br/> global $UC2GBTABLE;<br/> $okstr = &quot;&quot;;<br/> if(trim($utfstr)==&quot;&quot;) return $utfstr;<br/> if(empty($UC2GBTABLE)){<br/> &amp;nbsp;$filename = dirname(__FILE__).&quot;/gb2312-utf8.table&quot;;<br/> &amp;nbsp;$fp = fopen($filename,&quot;r&quot;);<br/> &amp;nbsp;while($l = fgets($fp,15))<br/> &amp;nbsp;{ $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));}<br/> &amp;nbsp;fclose($fp);<br/> }<br/> $okstr = &quot;&quot;;<br/> $utfstr = urlencode($utfstr);<br/> $ulen = strlen($utfstr);<br/> for($i=0;$i&lt;$ulen;$i++)<br/> {<br/> &amp;nbsp;if($utfstr[$i]==&quot;%&quot;)<br/> &amp;nbsp;{<br/> &amp;nbsp; if($ulen&gt;$i+2){<br/> &amp;nbsp; &amp;nbsp;$hexnext = hexdec(&quot;0x&quot;.substr($utfstr,$i+1,2));<br/> &amp;nbsp; &amp;nbsp;if($hexnext&lt;127){<br/> &amp;nbsp; &amp;nbsp; $okstr .= chr($hexnext);<br/> &amp;nbsp; &amp;nbsp; $i = $i+2;<br/> &amp;nbsp; &amp;nbsp;}<br/> &amp;nbsp; &amp;nbsp;else{<br/> &amp;nbsp; &amp;nbsp; if($ulen&gt;=$i+9){<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$hexnext = substr($utfstr,$i+1,8);<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$c = &quot;&quot;;<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;@$c = dechex($UC2GBTABLE[url_utf2u($hexnext)]+0x8080);<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;if($c!=&quot;&quot;){<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;$okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;}<br/> &amp;nbsp; &amp;nbsp; &amp;nbsp;$i = $i+8;<br/> &amp;nbsp; &amp;nbsp; }<br/> &amp;nbsp; &amp;nbsp;}<br/> &amp;nbsp; }<br/> &amp;nbsp; else<br/> &amp;nbsp; { $okstr .= $utfstr[$i]; }<br/> &amp;nbsp;}<br/> &amp;nbsp;else if($utfstr[$i]==&quot;+&quot;)<br/> &amp;nbsp; $okstr .= &quot; &quot;;<br/> &amp;nbsp;else<br/> &amp;nbsp; $okstr .= $utfstr[$i];<br/> }<br/> $okstr = trim($okstr);<br/> return $okstr;<br/>}<br/>//三字节的URL编码转成的utf8字符转为unicode编码<br/>function url_utf2u($c)<br/>{<br/> $utfc = &quot;&quot;;<br/> $cs = split(&quot;%&quot;,$c);<br/> for($i=0;$i&lt;count($cs);$i++){<br/> &amp;nbsp;$utfc .= chr(hexdec(&quot;0x&quot;.$cs[$i]));<br/> }<br/> $n = (ord($utfc[0]) &amp; 0x1f) &lt;&lt; 12;<br/> &amp;nbsp;$n += (ord($utfc[1]) &amp; 0x3f) &lt;&lt; 6;<br/> &amp;nbsp;$n += ord($utfc[2]) &amp; 0x3f;<br/> return $n;<br/>} <br/><br/>一测试，发现完全OK，而且速度居然比上一个方法要快，我真是搞不懂这是什么原因了 <br/><br/>谁要 gb2312-utf8.table 这个文件请加我的QQ 2500875 IT柏拉图 或与 1877000 泡泡 联系 <br/><br/>Tags - <a href="http://www.dzhope.com/tags/utf-8/" rel="tag">utf-8</a> , <a href="http://www.dzhope.com/tags/gb2312/" rel="tag">gb2312</a> , <a href="http://www.dzhope.com/tags/php%25E5%2587%25BD%25E6%2595%25B0/" rel="tag">php函数</a>
]]>
</description>
</item><item>
<link>http://www.dzhope.com/post//#blogcomment</link>
<title><![CDATA[[评论] 不用iconv函数实现UTF-8编码转换GB2312的PHP函数]]></title> 
<author> &lt;user@domain.com&gt;</author>
<category><![CDATA[评论]]></category>
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate> 
<guid>http://www.dzhope.com/post//#blogcomment</guid> 
<description>
<![CDATA[ 
	
]]>
</description>
</item>
</channel>
</rss>