<?php
//探测编码。注意:由于gb2312,gbk,big5范围有重合,按照最小范围判断
function get_encoding($string)
{
$re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
$re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
$re['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
$re['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
foreach( $re as $encoding=>$pattern )
{
$rstring = preg_replace($pattern, "", $string);
if( $rstring == "")
{
return $encoding;
}
}
return null;
}
?>
//探测编码。注意:由于gb2312,gbk,big5范围有重合,按照最小范围判断
function get_encoding($string)
{
$re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
$re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
$re['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
$re['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
foreach( $re as $encoding=>$pattern )
{
$rstring = preg_replace($pattern, "", $string);
if( $rstring == "")
{
return $encoding;
}
}
return null;
}
?>
注意:遇到内有乱码或者错误字符的情况未作测试,例如半个汉字的情形。
没有评论:
发表评论