65 lines
1.9 KiB
Go
65 lines
1.9 KiB
Go
// Package charset provides functions to decode and encode charsets.
|
|
//
|
|
// It imports all supported charsets, which adds about 1MiB to binaries size.
|
|
// Importing the package automatically sets message.CharsetReader.
|
|
package charset
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/emersion/go-message"
|
|
"golang.org/x/text/encoding"
|
|
"golang.org/x/text/encoding/charmap"
|
|
"golang.org/x/text/encoding/htmlindex"
|
|
"golang.org/x/text/encoding/ianaindex"
|
|
"golang.org/x/text/encoding/unicode"
|
|
)
|
|
|
|
// Quirks table for charsets not handled by ianaindex
|
|
//
|
|
// A nil entry disables the charset.
|
|
//
|
|
// For aliases, see
|
|
// https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
|
var charsets = map[string]encoding.Encoding{
|
|
"ansi_x3.110-1983": charmap.ISO8859_1, // see RFC 1345 page 62, mostly superset of ISO 8859-1
|
|
"x-utf_8j": unicode.UTF8, // alias for UTF-8, see https://icu4c-demos.unicode.org/icu-bin/convexp?s=ALL
|
|
}
|
|
|
|
func init() {
|
|
message.CharsetReader = Reader
|
|
}
|
|
|
|
// Reader returns an io.Reader that converts the provided charset to UTF-8.
|
|
func Reader(charset string, input io.Reader) (io.Reader, error) {
|
|
var err error
|
|
enc, ok := charsets[strings.ToLower(charset)]
|
|
if ok && enc == nil {
|
|
return nil, fmt.Errorf("charset %q: charset is disabled", charset)
|
|
} else if !ok {
|
|
enc, err = ianaindex.MIME.Encoding(charset)
|
|
}
|
|
if enc == nil {
|
|
enc, err = ianaindex.MIME.Encoding("cs" + charset)
|
|
}
|
|
if enc == nil {
|
|
enc, err = htmlindex.Get(charset)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("charset %q: %v", charset, err)
|
|
}
|
|
// See https://github.com/golang/go/issues/19421
|
|
if enc == nil {
|
|
return nil, fmt.Errorf("charset %q: unsupported charset", charset)
|
|
}
|
|
return enc.NewDecoder().Reader(input), nil
|
|
}
|
|
|
|
// RegisterEncoding registers an encoding. This is intended to be called from
|
|
// the init function in packages that want to support additional charsets.
|
|
func RegisterEncoding(name string, enc encoding.Encoding) {
|
|
charsets[name] = enc
|
|
}
|