apache日志记录的中文,是内码如下:
1 |
\xb6\xd4\xb6\xc0\xc1\xa2\xd1\xa7\xd4\xba\xbf\xc9\xb3\xd6\xd0\xf8\xb7\xa2\xd5\xb9\xce\xca\xcc\xe2 |
编码后中文是:
1 |
对独立学院可持续发展问题 |
完全看不懂是什么意思,如何解析出里面的中文出来呢?
php参考如下代码:
1 2 3 4 5 |
<?php $str="\xb6\xd4\xb6\xc0\xc1\xa2\xd1\xa7\xd4\xba\xbf\xc9\xb3\xd6\xd0\xf8\xb7\xa2\xd5\xb9\xce\xca\xcc\xe2"; eval("\$str = \"$str\";"); echo iconv('GB2312','UTF-8',$str."\n"); ?> |
golang参考如下代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import ( "fmt" "log" "os" "code.google.com/p/mahonia" ) func main() { var use_logfile bool use_logfile = true f, err := os.OpenFile("testlogfile", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf("error opening file: %v", err) } defer f.Close() if use_logfile { log.SetOutput(f) } s := "\xb6\xd4\xb6\xc0\xc1\xa2\xd1\xa7\xd4\xba\xbf\xc9\xb3\xd6\xd0\xf8\xb7\xa2\xd5\xb9\xce\xca\xcc\xe2" enc := mahonia.NewDecoder("UTF-8") x := enc.ConvertString(s) gbk := mahonia.NewDecoder("gbk") n := gbk.ConvertString(s) log.Printf("file:", x, n) fmt.Println(x, n) } |
java参考如下代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.StandardCharsets; public class HelloWorld { private static String unescapeGBK(final String s) { final StringBuilder buf = new StringBuilder(); int i = 0; int len = s.length(); while (i < len) { int ch = s.charAt(i); if (ch == '\\') { // \xXX\xXX : map to unicode(XXXX) int c = (char) ((Character.digit(s.charAt(i + 2), 16) << 4) + Character.digit(s.charAt(i + 3), 16)); c = c << 8; c += (char) ((Character.digit(s.charAt(i + 6), 16) << 4) + Character.digit(s.charAt(i + 7), 16)); buf.append((char) c); i += 7; } else { buf.append((char) ch); } i++; } return buf.toString(); } public static void main(String args[]) throws UnsupportedEncodingException { // UTF-8 String coderStr = "\\x22\\xE5\\x93\\x88\\xE5\\x93\\x88\\x22"; String str = coderStr.replaceAll("\\\\x", "%"); str = URLDecoder.decode(str, StandardCharsets.UTF_8.toString()); System.out.println(str); // GBK coderStr = "\\xb6\\xd4\\xb6\\xc0\\xc1\\xa2\\xd1\\xa7\\xd4\\xba\\xbf\\xc9\\xb3\\xd6\\xd0\\xf8\\xb7\\xa2\\xd5\\xb9\\xce\\xca\\xcc\\xe2"; str = getApacheChineseGBK(coderStr); System.out.println(str); } public static String getApacheChineseGBK(String str) throws UnsupportedEncodingException { str = str.trim().toLowerCase(); str = unescapeGBK(str); final ByteBuffer byteBuffer = ByteBuffer.allocate(str.length() * Character.SIZE / Byte.SIZE); final CharBuffer converter = byteBuffer.asCharBuffer(); converter.append(CharBuffer.wrap(str)); str = new String(byteBuffer.array(), "GBK"); return str; } } |