2009/06/08

php探测编码

<?php

//探测编码。注意:由于gb2312,gbk,big5范围有重合,按照最小范围判断
function get_encoding($string)
{

    $re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
    $re
['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
    $re
['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
    $re
['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
   
   
foreach( $re as $encoding=>$pattern )
    {

        $rstring = preg_replace($pattern, "", $string);
       
if( $rstring == "")
        {

            return $encoding;
        }

    }
    return null;
}


?>

注意:遇到内有乱码或者错误字符的情况未作测试,例如半个汉字的情形。

 

php探测字符编码

<DIV style="BACKGROUND: #fdfdfd; COLOR: black"><U>PHP语言</U>: <A
href="http://fayaa.com/code/view/1934/">Codee#1934</A></DIV>
<DIV class=source style="COLOR: #000000; FONT-FAMILY:
'[object]','Consolas','Lucida Console','Courier New';
BACKGROUND-COLOR: #f9f7ed" jQuery1244450739390="2"><SPAN style="COLOR:
#008080">&lt;?php</SPAN><BR><BR><SPAN style="COLOR: #008800;
FONT-STYLE: italic">//探测编码。注意:由于gb2312,gbk,big5范围有重合,按照最小范围判断</SPAN><BR><SPAN
style="FONT-WEIGHT: bold; COLOR: #000080">function</SPAN> <SPAN
style="COLOR: #000000">get_encoding</SPAN>(<SPAN style="COLOR:
#000000">$string</SPAN>)<BR><SPAN style="COLOR:
#000000">{</SPAN><BR>&nbsp;&nbsp;&nbsp; <SPAN style="COLOR:
#000000">$re</SPAN><SPAN style="COLOR: #000000">[</SPAN><SPAN
style="COLOR: #0000ff">&#39;utf-8&#39;</SPAN><SPAN style="COLOR:
#000000">]</SPAN> <SPAN style="COLOR: #000000">=</SPAN> <SPAN
style="COLOR: #0000ff">&quot;/[</SPAN><SPAN style="COLOR:
#0000ff">\x01</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\x7f</SPAN><SPAN style="COLOR:
#0000ff">]|[</SPAN><SPAN style="COLOR: #0000ff">\xc2</SPAN><SPAN
style="COLOR: #0000ff">-</SPAN><SPAN style="COLOR:
#0000ff">\xdf</SPAN><SPAN style="COLOR: #0000ff">][</SPAN><SPAN
style="COLOR: #0000ff">\x80</SPAN><SPAN style="COLOR:
#0000ff">-</SPAN><SPAN style="COLOR: #0000ff">\xbf</SPAN><SPAN
style="COLOR: #0000ff">]|[</SPAN><SPAN style="COLOR:
#0000ff">\xe0</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\xef</SPAN><SPAN style="COLOR:
#0000ff">][</SPAN><SPAN style="COLOR: #0000ff">\x80</SPAN><SPAN
style="COLOR: #0000ff">-</SPAN><SPAN style="COLOR:
#0000ff">\xbf</SPAN><SPAN style="COLOR: #0000ff">]{2}|[</SPAN><SPAN
style="COLOR: #0000ff">\xf0</SPAN><SPAN style="COLOR:
#0000ff">-</SPAN><SPAN style="COLOR: #0000ff">\xff</SPAN><SPAN
style="COLOR: #0000ff">][</SPAN><SPAN style="COLOR:
#0000ff">\x80</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\xbf</SPAN><SPAN style="COLOR:
#0000ff">]{3}/&quot;</SPAN>;<BR>&nbsp;&nbsp;&nbsp; <SPAN style="COLOR:
#000000">$re</SPAN><SPAN style="COLOR: #000000">[</SPAN><SPAN
style="COLOR: #0000ff">&#39;gb2312&#39;</SPAN><SPAN style="COLOR:
#000000">]</SPAN> <SPAN style="COLOR: #000000">=</SPAN> <SPAN
style="COLOR: #0000ff">&quot;/[</SPAN><SPAN style="COLOR:
#0000ff">\x01</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\x7f</SPAN><SPAN style="COLOR:
#0000ff">]|[</SPAN><SPAN style="COLOR: #0000ff">\xb0</SPAN><SPAN
style="COLOR: #0000ff">-</SPAN><SPAN style="COLOR:
#0000ff">\xf7</SPAN><SPAN style="COLOR: #0000ff">][</SPAN><SPAN
style="COLOR: #0000ff">\xa0</SPAN><SPAN style="COLOR:
#0000ff">-</SPAN><SPAN style="COLOR: #0000ff">\xfe</SPAN><SPAN
style="COLOR: #0000ff">]/&quot;</SPAN>;<BR>&nbsp;&nbsp;&nbsp; <SPAN
style="COLOR: #000000">$re</SPAN><SPAN style="COLOR:
#000000">[</SPAN><SPAN style="COLOR:
#0000ff">&#39;gbk&#39;</SPAN><SPAN style="COLOR: #000000">]</SPAN>
<SPAN style="COLOR: #000000">=</SPAN> <SPAN style="COLOR:
#0000ff">&quot;/[</SPAN><SPAN style="COLOR: #0000ff">\x01</SPAN><SPAN
style="COLOR: #0000ff">-</SPAN><SPAN style="COLOR:
#0000ff">\x7f</SPAN><SPAN style="COLOR: #0000ff">]|[</SPAN><SPAN
style="COLOR: #0000ff">\x81</SPAN><SPAN style="COLOR:
#0000ff">-</SPAN><SPAN style="COLOR: #0000ff">\xfe</SPAN><SPAN
style="COLOR: #0000ff">][</SPAN><SPAN style="COLOR:
#0000ff">\x40</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\xfe</SPAN><SPAN style="COLOR:
#0000ff">]/&quot;</SPAN>;<BR>&nbsp;&nbsp;&nbsp; <SPAN style="COLOR:
#000000">$re</SPAN><SPAN style="COLOR: #000000">[</SPAN><SPAN
style="COLOR: #0000ff">&#39;big5&#39;</SPAN><SPAN style="COLOR:
#000000">]</SPAN> <SPAN style="COLOR: #000000">=</SPAN> <SPAN
style="COLOR: #0000ff">&quot;/[</SPAN><SPAN style="COLOR:
#0000ff">\x01</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\x7f</SPAN><SPAN style="COLOR:
#0000ff">]|[</SPAN><SPAN style="COLOR: #0000ff">\x81</SPAN><SPAN
style="COLOR: #0000ff">-</SPAN><SPAN style="COLOR:
#0000ff">\xfe</SPAN><SPAN style="COLOR: #0000ff">]([</SPAN><SPAN
style="COLOR: #0000ff">\x40</SPAN><SPAN style="COLOR:
#0000ff">-</SPAN><SPAN style="COLOR: #0000ff">\x7e</SPAN><SPAN
style="COLOR: #0000ff">]|</SPAN><SPAN style="COLOR:
#0000ff">\xa1</SPAN><SPAN style="COLOR: #0000ff">-</SPAN><SPAN
style="COLOR: #0000ff">\xfe</SPAN><SPAN style="COLOR:
#0000ff">])/&quot;</SPAN>;<BR>&nbsp;&nbsp;&nbsp;
<BR>&nbsp;&nbsp;&nbsp; <SPAN style="FONT-WEIGHT: bold; COLOR:
#000080">foreach</SPAN>( <SPAN style="COLOR: #000000">$re</SPAN> <SPAN
style="FONT-WEIGHT: bold; COLOR: #000080">as</SPAN> <SPAN
style="COLOR: #000000">$encoding</SPAN><SPAN style="COLOR:
#000000">=&gt;</SPAN><SPAN style="COLOR: #000000">$pattern</SPAN>
)<BR>&nbsp;&nbsp;&nbsp; <SPAN style="COLOR:
#000000">{</SPAN><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <SPAN
style="COLOR: #000000">$rstring</SPAN> <SPAN style="COLOR:
#000000">=</SPAN> <SPAN style="COLOR:
#000000">preg_replace</SPAN>(<SPAN style="COLOR:
#000000">$pattern</SPAN><SPAN style="COLOR: #000000">,</SPAN> <SPAN
style="COLOR: #0000ff">&quot;&quot;</SPAN><SPAN style="COLOR:
#000000">,</SPAN> <SPAN style="COLOR:
#000000">$string</SPAN>);<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<SPAN style="FONT-WEIGHT: bold; COLOR: #000080">if</SPAN>( <SPAN
style="COLOR: #000000">$rstring</SPAN> <SPAN style="COLOR:
#000000">==</SPAN> <SPAN style="COLOR:
#0000ff">&quot;&quot;</SPAN>)<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<SPAN style="COLOR:
#000000">{</SPAN><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<SPAN style="FONT-WEIGHT: bold; COLOR: #000080">return</SPAN> <SPAN
style="COLOR: #000000">$encoding</SPAN>;<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<SPAN style="COLOR: #000000">}</SPAN><BR>&nbsp;&nbsp;&nbsp; <SPAN
style="COLOR: #000000">}</SPAN><BR>&nbsp;&nbsp;&nbsp; <SPAN
style="FONT-WEIGHT: bold; COLOR: #000080">return</SPAN> <SPAN
style="FONT-WEIGHT: bold; COLOR: #000080">null</SPAN>;<BR><SPAN
style="COLOR: #000000">}</SPAN><BR><BR><SPAN style="COLOR:
#008080">?&gt;</SPAN><BR></DIV>