Golang 实现 GBK、Big5、UTF-8 之间的转换

从一些编码是 GBK 或 Big5 的网站爬取得到的字符是乱码,需要转换一下,golang 有 text/encoding 库可以简单转换。

package main

import (
	"bytes"
	"fmt"
	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/encoding/traditionalchinese"
	"golang.org/x/text/transform"
	"io/ioutil"
)

//convert GBK to UTF-8
func DecodeGBK(s []byte) ([]byte, error) {
	I := bytes.NewReader(s)
	O := transform.NewReader(I, simplifiedchinese.GBK.NewDecoder())
	d, e := ioutil.ReadAll(O)
	if e != nil {
		return nil, e
	}
	return d, nil
}

//convert UTF-8 to GBK
func EncodeGBK(s []byte) ([]byte, error) {
	I := bytes.NewReader(s)
	O := transform.NewReader(I, simplifiedchinese.GBK.NewEncoder())
	d, e := ioutil.ReadAll(O)
	if e != nil {
		return nil, e
	}
	return d, nil
}

//convert BIG5 to UTF-8
func DecodeBig5(s []byte) ([]byte, error) {
	I := bytes.NewReader(s)
	O := transform.NewReader(I, traditionalchinese.Big5.NewDecoder())
	d, e := ioutil.ReadAll(O)
	if e != nil {
		return nil, e
	}
	return d, nil
}

//convert UTF-8 to BIG5
func EncodeBig5(s []byte) ([]byte, error) {
	I := bytes.NewReader(s)
	O := transform.NewReader(I, traditionalchinese.Big5.NewEncoder())
	d, e := ioutil.ReadAll(O)
	if e != nil {
		return nil, e
	}
	return d, nil
}

func main() {
	utf8Byte := []byte("UTF-8字符包子")
	fmt.Println(string(utf8Byte))

	// UTF-8 转 BIG5
	s, _ := EncodeBig5(utf8Byte)
	fmt.Println(string(s))

	// BIG5 转 UTF-8
	s, _ = DecodeBig5(s)
	fmt.Println(string(s))

	// UTF-8 转 GBK
	s, _ = EncodeGBK(s)
	fmt.Println(string(s))

	// GBK 转 UTF-8
	s, _ = DecodeGBK(s)
	fmt.Println(string(s))
}

输出:

UTF-8字符包子
UTF-8?r?????l
UTF-8字符包子
UTF-8?ַ?????
UTF-8字符包子
Relative Articles