blob: cd16f2c6b4987146c6358776d96f3a675aad589b [file] [log] [blame]
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file defines encoding and decoding functions for encoded-words
// as defined in RFC 2047.
package quotedprintable
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"strings"
"sync"
"unicode/utf8"
"gopkg.in/alexcesaro/quotedprintable.v2/internal"
)
// Encoding represents the encoding used in encoded-words. It must be one of the
// two encodings defined in RFC 2047 ("B" or "Q" encoding).
type Encoding string
const (
// Q represents the Q-encoding defined in RFC 2047.
Q Encoding = "Q"
// B represents the Base64 encoding defined in RFC 2045.
B Encoding = "B"
)
// A HeaderEncoder is an RFC 2047 encoded-word encoder.
type HeaderEncoder struct {
charset string
encoding Encoding
splitWords bool
}
const maxEncodedWordLen = 75 // As defined in RFC 2047, section 2
// NewHeaderEncoder returns a new HeaderEncoder to encode strings in the
// specified charset.
func (enc Encoding) NewHeaderEncoder(charset string) *HeaderEncoder {
// We automatically split encoded-words only when the charset is UTF-8
// because since multi-octet character must not be split across adjacent
// encoded-words (see RFC 2047, section 5) there is no way to split words
// without knowing how the charset works.
splitWords := strings.ToUpper(charset) == "UTF-8"
return &HeaderEncoder{charset, enc, splitWords}
}
// Encode encodes a string to be used as a MIME header value.
func (e *HeaderEncoder) Encode(s string) string {
return e.encodeWord(s)
}
// NeedsEncoding returns whether the given header content needs to be encoded
// into an encoded-words.
func NeedsEncoding(s string) bool {
for i := 0; i < len(s); i++ {
b := s[i]
if (b > '~' || b < ' ') && b != '\t' {
return true
}
}
return false
}
type bufPool struct {
sync.Pool
}
func (b *bufPool) GetBuffer() *bytes.Buffer {
return bufFree.Get().(*bytes.Buffer)
}
func (b *bufPool) PutBuffer(buf *bytes.Buffer) {
if buf.Len() > 1024 {
return
}
buf.Reset()
b.Put(buf)
}
var bufFree = &bufPool{
sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
},
}
// encodeWord encodes a string into an encoded-word.
func (e *HeaderEncoder) encodeWord(s string) string {
buf := bufFree.GetBuffer()
defer bufFree.PutBuffer(buf)
e.openWord(buf)
switch {
case e.encoding == B:
e.encodeWordB(buf, s)
default:
e.encodeWordQ(buf, s)
}
e.closeWord(buf)
return buf.String()
}
func (e *HeaderEncoder) encodeWordB(buf *bytes.Buffer, s string) {
maxLen := maxEncodedWordLen - buf.Len() - 2
if !e.splitWords || base64.StdEncoding.EncodedLen(len(s)) <= maxLen {
buf.WriteString(base64.StdEncoding.EncodeToString([]byte(s)))
return
}
v := []byte(s)
var n, last, runeSize int
for i := 0; i < len(s); i += runeSize {
runeSize = getRuneSize(s, i)
if base64.StdEncoding.EncodedLen(n+runeSize) <= maxLen {
n += runeSize
} else {
buf.WriteString(base64.StdEncoding.EncodeToString(v[last:i]))
e.splitWord(buf)
last = i
n = runeSize
}
}
buf.WriteString(base64.StdEncoding.EncodeToString(v[last:]))
}
func (e *HeaderEncoder) encodeWordQ(buf *bytes.Buffer, s string) {
if !e.splitWords {
for i := 0; i < len(s); i++ {
writeQ(buf, s[i])
}
return
}
var runeSize int
n := buf.Len()
for i := 0; i < len(s); i += runeSize {
b := s[i]
var encLen int
if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
encLen, runeSize = 1, 1
} else {
runeSize = getRuneSize(s, i)
encLen = 3 * runeSize
}
// We remove 2 to let spaces for closing chars "?="
if n+encLen > maxEncodedWordLen-2 {
n = e.splitWord(buf)
}
writeQString(buf, s[i:i+runeSize])
n += encLen
}
}
func (e *HeaderEncoder) openWord(buf *bytes.Buffer) int {
n := buf.Len()
buf.WriteString("=?")
buf.WriteString(e.charset)
buf.WriteByte('?')
buf.WriteString(string(e.encoding))
buf.WriteByte('?')
return buf.Len() - n
}
func (e *HeaderEncoder) closeWord(buf *bytes.Buffer) {
buf.WriteString("?=")
}
func (e *HeaderEncoder) splitWord(buf *bytes.Buffer) int {
e.closeWord(buf)
buf.WriteString("\r\n ")
return e.openWord(buf)
}
func getRuneSize(s string, i int) int {
runeSize := 1
for i+runeSize < len(s) && !utf8.RuneStart(s[i+runeSize]) {
runeSize++
}
return runeSize
}
func writeQString(buf *bytes.Buffer, s string) {
for i := 0; i < len(s); i++ {
writeQ(buf, s[i])
}
}
func writeQ(buf *bytes.Buffer, b byte) {
switch {
case b == ' ':
buf.WriteByte('_')
case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
buf.WriteByte(b)
default:
enc := make([]byte, 3)
internal.EncodeByte(enc, b)
buf.Write(enc)
}
}
// DecodeHeader decodes a MIME header by decoding all encoded-words of the
// header. The returned text is encoded in the returned charset. Text is not
// necessarily encoded in UTF-8. Returned charset is always upper case. This
// function does not support decoding headers with multiple encoded-words
// using different charsets.
func DecodeHeader(header string) (text, charset string, err error) {
buf := bufFree.GetBuffer()
defer bufFree.PutBuffer(buf)
for {
i := strings.IndexByte(header, '=')
if i == -1 {
break
}
if i > 0 {
buf.WriteString(header[:i])
header = header[i:]
}
word := getEncodedWord(header)
if word == "" {
buf.WriteByte('=')
header = header[1:]
continue
}
for {
dec, wordCharset, err := decodeWord(word)
if err != nil {
buf.WriteString(word)
header = header[len(word):]
break
}
if charset == "" {
charset = wordCharset
} else if charset != wordCharset {
return "", "", fmt.Errorf("quotedprintable: multiple charsets in header are not supported: %q and %q used", charset, wordCharset)
}
buf.Write(dec)
header = header[len(word):]
// White-space and newline characters separating two encoded-words
// must be deleted.
var j int
for j = 0; j < len(header); j++ {
b := header[j]
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
break
}
}
if j == 0 {
// If there are no white-space characters following the current
// encoded-word there is nothing special to do.
break
}
word = getEncodedWord(header[j:])
if word == "" {
break
}
header = header[j:]
}
}
buf.WriteString(header)
return buf.String(), charset, nil
}
func getEncodedWord(s string) string {
if len(s) < 2 {
return ""
}
if s[0] != '=' {
return ""
}
if s[1] != '?' {
return ""
}
n := 2
for {
if n >= len(s) {
return ""
}
b := s[n]
if (b < '0' || b > '9') &&
(b < 'A' || b > 'Z') &&
(b < 'a' || b > 'z') &&
b != '-' {
break
}
n++
}
if s[n] != '?' {
return ""
}
if n+2 >= len(s) {
return ""
}
b := s[n+1]
if b != 'Q' && b != 'B' && b != 'q' && b != 'b' {
return ""
}
if s[n+2] != '?' {
return ""
}
n = n + 3
for {
if n >= len(s) {
return ""
}
if s[n] < ' ' || s[n] > '~' {
return ""
}
if s[n] == '?' {
n++
break
}
n++
}
if n >= len(s) || s[n] != '=' {
return ""
}
return s[0 : n+1]
}
func decodeWord(s string) (text []byte, charset string, err error) {
fields := strings.Split(s, "?")
if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
return []byte(s), "", nil
}
charset, enc, src := fields[1], fields[2], fields[3]
var dec []byte
switch Encoding(strings.ToUpper(enc)) {
case B:
if dec, err = base64.StdEncoding.DecodeString(src); err != nil {
return dec, charset, err
}
case Q:
if dec, err = qDecode(src); err != nil {
return dec, charset, err
}
default:
return []byte(""), charset, fmt.Errorf("quotedprintable: RFC 2047 encoding not supported: %q", enc)
}
return dec, strings.ToUpper(charset), nil
}
// qDecode decodes a Q encoded string.
func qDecode(s string) ([]byte, error) {
length := len(s)
for i := 0; i < len(s); i++ {
if s[i] == '=' {
length -= 2
i += 2
}
}
dec := make([]byte, length)
n := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == '_':
dec[n] = ' '
case c == '=':
if i+2 >= len(s) {
return []byte(""), io.ErrUnexpectedEOF
}
buf, err := internal.ReadHexByte([]byte(s[i+1:]))
if err != nil {
return []byte(""), err
}
dec[n] = buf
i += 2
case (c >= ' ' && c <= '~') || c == '\n' || c == '\r' || c == '\t':
dec[n] = c
default:
return []byte(""), fmt.Errorf("quotedprintable: invalid unescaped byte %#02x in Q encoded string at byte %d", c, i)
}
n++
}
return dec, nil
}