2019-05-21 05:36:23 +08:00
|
|
|
package internal
|
|
|
|
|
|
|
|
|
|
import (
|
2020-05-05 07:26:05 +08:00
|
|
|
"bufio"
|
2019-05-21 05:36:23 +08:00
|
|
|
"bytes"
|
|
|
|
|
"errors"
|
2023-04-14 23:14:55 +08:00
|
|
|
"fmt"
|
2019-05-21 05:36:23 +08:00
|
|
|
"io"
|
2023-06-09 21:28:14 +08:00
|
|
|
|
|
|
|
|
"github.com/klauspost/compress/gzip"
|
|
|
|
|
"github.com/klauspost/compress/zlib"
|
|
|
|
|
"github.com/klauspost/pgzip"
|
2019-05-21 05:36:23 +08:00
|
|
|
)
|
|
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
const DefaultMaxDecompressionSize = 500 * 1024 * 1024 //500MB
|
|
|
|
|
|
2023-06-09 21:28:14 +08:00
|
|
|
// EncodingOption provide methods to change the encoding from the standard
|
|
|
|
|
// configuration.
|
|
|
|
|
type EncodingOption func(*encoderConfig)
|
|
|
|
|
|
|
|
|
|
type encoderConfig struct {
|
|
|
|
|
level int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func EncoderCompressionLevel(level int) EncodingOption {
|
|
|
|
|
return func(cfg *encoderConfig) {
|
|
|
|
|
cfg.level = level
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-05 07:26:05 +08:00
|
|
|
// NewStreamContentDecoder returns a reader that will decode the stream
|
|
|
|
|
// according to the encoding type.
|
|
|
|
|
func NewStreamContentDecoder(encoding string, r io.Reader) (io.Reader, error) {
|
|
|
|
|
switch encoding {
|
|
|
|
|
case "gzip":
|
|
|
|
|
return NewGzipReader(r)
|
|
|
|
|
case "identity", "":
|
|
|
|
|
return r, nil
|
|
|
|
|
default:
|
|
|
|
|
return nil, errors.New("invalid value for content_encoding")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GzipReader is similar to gzip.Reader but reads only a single gzip stream per read.
|
|
|
|
|
type GzipReader struct {
|
|
|
|
|
r io.Reader
|
2023-06-09 21:28:14 +08:00
|
|
|
z *pgzip.Reader
|
2020-05-05 07:26:05 +08:00
|
|
|
endOfStream bool
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewGzipReader(r io.Reader) (io.Reader, error) {
|
|
|
|
|
// We need a read that implements ByteReader in order to line up the next
|
|
|
|
|
// stream.
|
|
|
|
|
br := bufio.NewReader(r)
|
|
|
|
|
|
|
|
|
|
// Reads the first gzip stream header.
|
2023-06-09 21:28:14 +08:00
|
|
|
z, err := pgzip.NewReader(br)
|
2020-05-05 07:26:05 +08:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Prevent future calls to Read from reading the following gzip header.
|
|
|
|
|
z.Multistream(false)
|
|
|
|
|
|
|
|
|
|
return &GzipReader{r: br, z: z}, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (r *GzipReader) Read(b []byte) (int, error) {
|
|
|
|
|
if r.endOfStream {
|
|
|
|
|
// Reads the next gzip header and prepares for the next stream.
|
|
|
|
|
err := r.z.Reset(r.r)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return 0, err
|
|
|
|
|
}
|
|
|
|
|
r.z.Multistream(false)
|
|
|
|
|
r.endOfStream = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n, err := r.z.Read(b)
|
|
|
|
|
|
|
|
|
|
// Since multistream is disabled, io.EOF indicates the end of the gzip
|
|
|
|
|
// sequence. On the next read we must read the next gzip header.
|
2023-02-22 19:57:53 +08:00
|
|
|
if errors.Is(err, io.EOF) {
|
2020-05-05 07:26:05 +08:00
|
|
|
r.endOfStream = true
|
|
|
|
|
return n, nil
|
|
|
|
|
}
|
|
|
|
|
return n, err
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
// NewContentEncoder returns a ContentEncoder for the encoding type.
|
2023-06-09 21:28:14 +08:00
|
|
|
func NewContentEncoder(encoding string, options ...EncodingOption) (ContentEncoder, error) {
|
2019-05-21 05:36:23 +08:00
|
|
|
switch encoding {
|
|
|
|
|
case "gzip":
|
2023-06-09 21:28:14 +08:00
|
|
|
return NewGzipEncoder(options...)
|
2022-01-08 00:38:19 +08:00
|
|
|
case "zlib":
|
2023-06-09 21:28:14 +08:00
|
|
|
return NewZlibEncoder(options...)
|
2019-05-21 05:36:23 +08:00
|
|
|
case "identity", "":
|
|
|
|
|
return NewIdentityEncoder(), nil
|
|
|
|
|
default:
|
|
|
|
|
return nil, errors.New("invalid value for content_encoding")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-27 03:28:32 +08:00
|
|
|
type AutoDecoder struct {
|
|
|
|
|
encoding string
|
|
|
|
|
gzip *GzipDecoder
|
|
|
|
|
identity *IdentityDecoder
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-13 03:08:03 +08:00
|
|
|
func (a *AutoDecoder) SetEncoding(encoding string) {
|
2022-09-27 03:28:32 +08:00
|
|
|
a.encoding = encoding
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
func (a *AutoDecoder) Decode(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
2022-09-27 03:28:32 +08:00
|
|
|
if a.encoding == "gzip" {
|
2023-04-14 23:14:55 +08:00
|
|
|
return a.gzip.Decode(data, maxDecompressionSize)
|
2022-09-27 03:28:32 +08:00
|
|
|
}
|
2023-04-14 23:14:55 +08:00
|
|
|
return a.identity.Decode(data, maxDecompressionSize)
|
2022-09-27 03:28:32 +08:00
|
|
|
}
|
|
|
|
|
|
2022-11-09 03:04:12 +08:00
|
|
|
func NewAutoContentDecoder() *AutoDecoder {
|
2022-09-27 03:28:32 +08:00
|
|
|
var a AutoDecoder
|
|
|
|
|
|
|
|
|
|
a.identity = NewIdentityDecoder()
|
2022-11-09 03:04:12 +08:00
|
|
|
a.gzip = NewGzipDecoder()
|
|
|
|
|
return &a
|
2022-09-27 03:28:32 +08:00
|
|
|
}
|
|
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
// NewContentDecoder returns a ContentDecoder for the encoding type.
|
|
|
|
|
func NewContentDecoder(encoding string) (ContentDecoder, error) {
|
|
|
|
|
switch encoding {
|
|
|
|
|
case "gzip":
|
2022-11-09 03:04:12 +08:00
|
|
|
return NewGzipDecoder(), nil
|
2022-01-08 00:38:19 +08:00
|
|
|
case "zlib":
|
2022-11-09 03:04:12 +08:00
|
|
|
return NewZlibDecoder(), nil
|
2019-05-21 05:36:23 +08:00
|
|
|
case "identity", "":
|
|
|
|
|
return NewIdentityDecoder(), nil
|
2022-09-27 03:28:32 +08:00
|
|
|
case "auto":
|
2022-11-09 03:04:12 +08:00
|
|
|
return NewAutoContentDecoder(), nil
|
2019-05-21 05:36:23 +08:00
|
|
|
default:
|
|
|
|
|
return nil, errors.New("invalid value for content_encoding")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ContentEncoder applies a wrapper encoding to byte buffers.
|
|
|
|
|
type ContentEncoder interface {
|
|
|
|
|
Encode([]byte) ([]byte, error)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GzipEncoder compresses the buffer using gzip at the default level.
|
|
|
|
|
type GzipEncoder struct {
|
2023-06-09 21:28:14 +08:00
|
|
|
pwriter *pgzip.Writer
|
|
|
|
|
writer *gzip.Writer
|
|
|
|
|
buf *bytes.Buffer
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
|
|
|
|
|
2023-06-09 21:28:14 +08:00
|
|
|
func NewGzipEncoder(options ...EncodingOption) (*GzipEncoder, error) {
|
|
|
|
|
cfg := encoderConfig{level: pgzip.DefaultCompression}
|
|
|
|
|
for _, o := range options {
|
|
|
|
|
o(&cfg)
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
var buf bytes.Buffer
|
2023-06-09 21:28:14 +08:00
|
|
|
pw, err := pgzip.NewWriterLevel(&buf, cfg.level)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2022-11-09 03:04:12 +08:00
|
|
|
}
|
2023-06-09 21:28:14 +08:00
|
|
|
w, err := gzip.NewWriterLevel(&buf, cfg.level)
|
|
|
|
|
return &GzipEncoder{
|
|
|
|
|
pwriter: pw,
|
|
|
|
|
writer: w,
|
|
|
|
|
buf: &buf,
|
|
|
|
|
}, err
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (e *GzipEncoder) Encode(data []byte) ([]byte, error) {
|
2023-06-09 21:28:14 +08:00
|
|
|
// Parallel Gzip is only faster for larger data chunks. According to the
|
|
|
|
|
// project's documentation the trade-off size is at about 1MB, so we switch
|
|
|
|
|
// to parallel Gzip if the data is larger and run the built-in version
|
|
|
|
|
// otherwise.
|
|
|
|
|
if len(data) > 1024*1024 {
|
|
|
|
|
return e.encodeBig(data)
|
|
|
|
|
}
|
|
|
|
|
return e.encodeSmall(data)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (e *GzipEncoder) encodeSmall(data []byte) ([]byte, error) {
|
2019-05-21 05:36:23 +08:00
|
|
|
e.buf.Reset()
|
|
|
|
|
e.writer.Reset(e.buf)
|
|
|
|
|
|
|
|
|
|
_, err := e.writer.Write(data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
err = e.writer.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return e.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-09 21:28:14 +08:00
|
|
|
func (e *GzipEncoder) encodeBig(data []byte) ([]byte, error) {
|
|
|
|
|
e.buf.Reset()
|
|
|
|
|
e.pwriter.Reset(e.buf)
|
|
|
|
|
|
|
|
|
|
_, err := e.pwriter.Write(data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
err = e.pwriter.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return e.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-08 00:38:19 +08:00
|
|
|
type ZlibEncoder struct {
|
|
|
|
|
writer *zlib.Writer
|
|
|
|
|
buf *bytes.Buffer
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-09 21:28:14 +08:00
|
|
|
func NewZlibEncoder(options ...EncodingOption) (*ZlibEncoder, error) {
|
|
|
|
|
cfg := encoderConfig{level: zlib.DefaultCompression}
|
|
|
|
|
for _, o := range options {
|
|
|
|
|
o(&cfg)
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-08 00:38:19 +08:00
|
|
|
var buf bytes.Buffer
|
2023-06-09 21:28:14 +08:00
|
|
|
w, err := zlib.NewWriterLevel(&buf, cfg.level)
|
2022-01-08 00:38:19 +08:00
|
|
|
return &ZlibEncoder{
|
2023-06-09 21:28:14 +08:00
|
|
|
writer: w,
|
2022-01-08 00:38:19 +08:00
|
|
|
buf: &buf,
|
2023-06-09 21:28:14 +08:00
|
|
|
}, err
|
2022-01-08 00:38:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (e *ZlibEncoder) Encode(data []byte) ([]byte, error) {
|
|
|
|
|
e.buf.Reset()
|
|
|
|
|
e.writer.Reset(e.buf)
|
|
|
|
|
|
|
|
|
|
_, err := e.writer.Write(data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
err = e.writer.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return e.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
// IdentityEncoder is a null encoder that applies no transformation.
|
|
|
|
|
type IdentityEncoder struct{}
|
|
|
|
|
|
|
|
|
|
func NewIdentityEncoder() *IdentityEncoder {
|
|
|
|
|
return &IdentityEncoder{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*IdentityEncoder) Encode(data []byte) ([]byte, error) {
|
|
|
|
|
return data, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ContentDecoder removes a wrapper encoding from byte buffers.
|
|
|
|
|
type ContentDecoder interface {
|
2022-10-13 03:08:03 +08:00
|
|
|
SetEncoding(string)
|
2023-04-14 23:14:55 +08:00
|
|
|
Decode([]byte, int64) ([]byte, error)
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GzipDecoder decompresses buffers with gzip compression.
|
|
|
|
|
type GzipDecoder struct {
|
2023-06-09 21:28:14 +08:00
|
|
|
preader *pgzip.Reader
|
|
|
|
|
reader *gzip.Reader
|
|
|
|
|
buf *bytes.Buffer
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
|
|
|
|
|
2022-11-09 03:04:12 +08:00
|
|
|
func NewGzipDecoder() *GzipDecoder {
|
2019-05-21 05:36:23 +08:00
|
|
|
return &GzipDecoder{
|
2023-06-09 21:28:14 +08:00
|
|
|
preader: new(pgzip.Reader),
|
|
|
|
|
reader: new(gzip.Reader),
|
|
|
|
|
buf: new(bytes.Buffer),
|
2022-11-09 03:04:12 +08:00
|
|
|
}
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
|
|
|
|
|
2022-10-13 03:08:03 +08:00
|
|
|
func (*GzipDecoder) SetEncoding(string) {}
|
2022-09-27 03:28:32 +08:00
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
func (d *GzipDecoder) Decode(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
2023-06-09 21:28:14 +08:00
|
|
|
// Parallel Gzip is only faster for larger data chunks. According to the
|
|
|
|
|
// project's documentation the trade-off size is at about 1MB, so we switch
|
|
|
|
|
// to parallel Gzip if the data is larger and run the built-in version
|
|
|
|
|
// otherwise.
|
|
|
|
|
if len(data) > 1024*1024 {
|
|
|
|
|
return d.decodeBig(data, maxDecompressionSize)
|
|
|
|
|
}
|
|
|
|
|
return d.decodeSmall(data, maxDecompressionSize)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (d *GzipDecoder) decodeSmall(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
2022-10-13 03:08:03 +08:00
|
|
|
err := d.reader.Reset(bytes.NewBuffer(data))
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2019-05-21 05:36:23 +08:00
|
|
|
d.buf.Reset()
|
|
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
n, err := io.CopyN(d.buf, d.reader, maxDecompressionSize)
|
2023-02-22 19:57:53 +08:00
|
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
2019-05-21 05:36:23 +08:00
|
|
|
return nil, err
|
2023-04-14 23:14:55 +08:00
|
|
|
} else if n == maxDecompressionSize {
|
|
|
|
|
return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", maxDecompressionSize)
|
2019-05-21 05:36:23 +08:00
|
|
|
}
|
2023-04-14 23:14:55 +08:00
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
err = d.reader.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return d.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-09 21:28:14 +08:00
|
|
|
func (d *GzipDecoder) decodeBig(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
|
|
|
|
err := d.preader.Reset(bytes.NewBuffer(data))
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
d.buf.Reset()
|
|
|
|
|
|
|
|
|
|
n, err := io.CopyN(d.buf, d.preader, maxDecompressionSize)
|
|
|
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
|
|
|
|
return nil, err
|
|
|
|
|
} else if n == maxDecompressionSize {
|
|
|
|
|
return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", maxDecompressionSize)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = d.preader.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return d.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-08 00:38:19 +08:00
|
|
|
type ZlibDecoder struct {
|
|
|
|
|
buf *bytes.Buffer
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-09 03:04:12 +08:00
|
|
|
func NewZlibDecoder() *ZlibDecoder {
|
2022-01-08 00:38:19 +08:00
|
|
|
return &ZlibDecoder{
|
|
|
|
|
buf: new(bytes.Buffer),
|
2022-11-09 03:04:12 +08:00
|
|
|
}
|
2022-01-08 00:38:19 +08:00
|
|
|
}
|
|
|
|
|
|
2022-10-13 03:08:03 +08:00
|
|
|
func (*ZlibDecoder) SetEncoding(string) {}
|
2022-09-27 03:28:32 +08:00
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
func (d *ZlibDecoder) Decode(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
2022-01-08 00:38:19 +08:00
|
|
|
d.buf.Reset()
|
|
|
|
|
|
|
|
|
|
b := bytes.NewBuffer(data)
|
|
|
|
|
r, err := zlib.NewReader(b)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2023-04-14 23:14:55 +08:00
|
|
|
|
|
|
|
|
n, err := io.CopyN(d.buf, r, maxDecompressionSize)
|
2023-02-22 19:57:53 +08:00
|
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
2022-01-08 00:38:19 +08:00
|
|
|
return nil, err
|
2023-04-14 23:14:55 +08:00
|
|
|
} else if n == maxDecompressionSize {
|
|
|
|
|
return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", maxDecompressionSize)
|
2022-01-08 00:38:19 +08:00
|
|
|
}
|
2023-04-14 23:14:55 +08:00
|
|
|
|
2022-01-08 00:38:19 +08:00
|
|
|
err = r.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
return d.buf.Bytes(), nil
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-21 05:36:23 +08:00
|
|
|
// IdentityDecoder is a null decoder that returns the input.
|
|
|
|
|
type IdentityDecoder struct{}
|
|
|
|
|
|
|
|
|
|
func NewIdentityDecoder() *IdentityDecoder {
|
|
|
|
|
return &IdentityDecoder{}
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-13 03:08:03 +08:00
|
|
|
func (*IdentityDecoder) SetEncoding(string) {}
|
2022-09-27 03:28:32 +08:00
|
|
|
|
2023-04-14 23:14:55 +08:00
|
|
|
func (*IdentityDecoder) Decode(data []byte, maxDecompressionSize int64) ([]byte, error) {
|
|
|
|
|
size := int64(len(data))
|
|
|
|
|
if size > maxDecompressionSize {
|
|
|
|
|
return nil, fmt.Errorf("size of decoded data: %d exceeds allowed size %d", size, maxDecompressionSize)
|
|
|
|
|
}
|
2019-05-21 05:36:23 +08:00
|
|
|
return data, nil
|
|
|
|
|
}
|