Pooled and buffered gzip implementation (#5722)
* Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this.tokarchuk/v1.17
parent
075649572d
commit
7d434376f1
@ -0,0 +1,129 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package integrations |
||||
|
||||
import ( |
||||
"archive/zip" |
||||
"bytes" |
||||
"crypto/sha256" |
||||
"encoding/hex" |
||||
"io" |
||||
"io/ioutil" |
||||
"net/http" |
||||
"testing" |
||||
|
||||
"code.gitea.io/gitea/models" |
||||
"code.gitea.io/gitea/modules/gzip" |
||||
"code.gitea.io/gitea/modules/lfs" |
||||
"code.gitea.io/gitea/modules/setting" |
||||
"github.com/stretchr/testify/assert" |
||||
|
||||
gzipp "github.com/klauspost/compress/gzip" |
||||
) |
||||
|
||||
func GenerateLFSOid(content io.Reader) (string, error) { |
||||
h := sha256.New() |
||||
if _, err := io.Copy(h, content); err != nil { |
||||
return "", err |
||||
} |
||||
sum := h.Sum(nil) |
||||
return hex.EncodeToString(sum), nil |
||||
} |
||||
|
||||
var lfsID = int64(20000) |
||||
|
||||
func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string { |
||||
oid, err := GenerateLFSOid(bytes.NewReader(*content)) |
||||
assert.NoError(t, err) |
||||
var lfsMetaObject *models.LFSMetaObject |
||||
|
||||
if setting.UsePostgreSQL { |
||||
lfsMetaObject = &models.LFSMetaObject{ID: lfsID, Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} |
||||
} else { |
||||
lfsMetaObject = &models.LFSMetaObject{Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} |
||||
} |
||||
|
||||
lfsID = lfsID + 1 |
||||
lfsMetaObject, err = models.NewLFSMetaObject(lfsMetaObject) |
||||
assert.NoError(t, err) |
||||
contentStore := &lfs.ContentStore{BasePath: setting.LFS.ContentPath} |
||||
if !contentStore.Exists(lfsMetaObject) { |
||||
err := contentStore.Put(lfsMetaObject, bytes.NewReader(*content)) |
||||
assert.NoError(t, err) |
||||
} |
||||
return oid |
||||
} |
||||
|
||||
func doLfs(t *testing.T, content *[]byte, expectGzip bool) { |
||||
prepareTestEnv(t) |
||||
repo, err := models.GetRepositoryByOwnerAndName("user2", "repo1") |
||||
assert.NoError(t, err) |
||||
oid := storeObjectInRepo(t, repo.ID, content) |
||||
defer repo.RemoveLFSMetaObjectByOid(oid) |
||||
|
||||
session := loginUser(t, "user2") |
||||
|
||||
// Request OID
|
||||
req := NewRequest(t, "GET", "/user2/repo1.git/info/lfs/objects/"+oid+"/test") |
||||
req.Header.Set("Accept-Encoding", "gzip") |
||||
resp := session.MakeRequest(t, req, http.StatusOK) |
||||
|
||||
contentEncoding := resp.Header().Get("Content-Encoding") |
||||
if !expectGzip || !setting.EnableGzip { |
||||
assert.NotContains(t, contentEncoding, "gzip") |
||||
|
||||
result := resp.Body.Bytes() |
||||
assert.Equal(t, *content, result) |
||||
} else { |
||||
assert.Contains(t, contentEncoding, "gzip") |
||||
gzippReader, err := gzipp.NewReader(resp.Body) |
||||
assert.NoError(t, err) |
||||
result, err := ioutil.ReadAll(gzippReader) |
||||
assert.NoError(t, err) |
||||
assert.Equal(t, *content, result) |
||||
} |
||||
|
||||
} |
||||
|
||||
func TestGetLFSSmall(t *testing.T) { |
||||
content := []byte("A very small file\n") |
||||
doLfs(t, &content, false) |
||||
} |
||||
|
||||
func TestGetLFSLarge(t *testing.T) { |
||||
content := make([]byte, gzip.MinSize*10) |
||||
for i := range content { |
||||
content[i] = byte(i % 256) |
||||
} |
||||
doLfs(t, &content, true) |
||||
} |
||||
|
||||
func TestGetLFSGzip(t *testing.T) { |
||||
b := make([]byte, gzip.MinSize*10) |
||||
for i := range b { |
||||
b[i] = byte(i % 256) |
||||
} |
||||
outputBuffer := bytes.NewBuffer([]byte{}) |
||||
gzippWriter := gzipp.NewWriter(outputBuffer) |
||||
gzippWriter.Write(b) |
||||
gzippWriter.Close() |
||||
content := outputBuffer.Bytes() |
||||
doLfs(t, &content, false) |
||||
} |
||||
|
||||
func TestGetLFSZip(t *testing.T) { |
||||
b := make([]byte, gzip.MinSize*10) |
||||
for i := range b { |
||||
b[i] = byte(i % 256) |
||||
} |
||||
outputBuffer := bytes.NewBuffer([]byte{}) |
||||
zipWriter := zip.NewWriter(outputBuffer) |
||||
fileWriter, err := zipWriter.Create("default") |
||||
assert.NoError(t, err) |
||||
fileWriter.Write(b) |
||||
zipWriter.Close() |
||||
content := outputBuffer.Bytes() |
||||
doLfs(t, &content, false) |
||||
} |
@ -0,0 +1,327 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gzip |
||||
|
||||
import ( |
||||
"bufio" |
||||
"fmt" |
||||
"io" |
||||
"net" |
||||
"net/http" |
||||
"regexp" |
||||
"strconv" |
||||
"strings" |
||||
"sync" |
||||
|
||||
"github.com/klauspost/compress/gzip" |
||||
"gopkg.in/macaron.v1" |
||||
) |
||||
|
||||
const ( |
||||
acceptEncodingHeader = "Accept-Encoding" |
||||
contentEncodingHeader = "Content-Encoding" |
||||
contentLengthHeader = "Content-Length" |
||||
contentTypeHeader = "Content-Type" |
||||
rangeHeader = "Range" |
||||
varyHeader = "Vary" |
||||
) |
||||
|
||||
const ( |
||||
// MinSize is the minimum size of content we will compress
|
||||
MinSize = 1400 |
||||
) |
||||
|
||||
// noopClosers are io.Writers with a shim to prevent early closure
|
||||
type noopCloser struct { |
||||
io.Writer |
||||
} |
||||
|
||||
func (noopCloser) Close() error { return nil } |
||||
|
||||
// WriterPool is a gzip writer pool to reduce workload on creation of
|
||||
// gzip writers
|
||||
type WriterPool struct { |
||||
pool sync.Pool |
||||
compressionLevel int |
||||
} |
||||
|
||||
// NewWriterPool creates a new pool
|
||||
func NewWriterPool(compressionLevel int) *WriterPool { |
||||
return &WriterPool{pool: sync.Pool{ |
||||
// New will return nil, we'll manage the creation of new
|
||||
// writers in the middleware
|
||||
New: func() interface{} { return nil }, |
||||
}, |
||||
compressionLevel: compressionLevel} |
||||
} |
||||
|
||||
// Get a writer from the pool - or create one if not available
|
||||
func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer { |
||||
ret := wp.pool.Get() |
||||
if ret == nil { |
||||
ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel) |
||||
} else { |
||||
ret.(*gzip.Writer).Reset(rw) |
||||
} |
||||
return ret.(*gzip.Writer) |
||||
} |
||||
|
||||
// Put returns a writer to the pool
|
||||
func (wp *WriterPool) Put(w *gzip.Writer) { |
||||
wp.pool.Put(w) |
||||
} |
||||
|
||||
var writerPool WriterPool |
||||
var regex regexp.Regexp |
||||
|
||||
// Options represents the configuration for the gzip middleware
|
||||
type Options struct { |
||||
CompressionLevel int |
||||
} |
||||
|
||||
func validateCompressionLevel(level int) bool { |
||||
return level == gzip.DefaultCompression || |
||||
level == gzip.ConstantCompression || |
||||
(level >= gzip.BestSpeed && level <= gzip.BestCompression) |
||||
} |
||||
|
||||
func validate(options []Options) Options { |
||||
// Default to level 4 compression (Best results seem to be between 4 and 6)
|
||||
opt := Options{CompressionLevel: 4} |
||||
if len(options) > 0 { |
||||
opt = options[0] |
||||
} |
||||
if !validateCompressionLevel(opt.CompressionLevel) { |
||||
opt.CompressionLevel = 4 |
||||
} |
||||
return opt |
||||
} |
||||
|
||||
// Middleware creates a macaron.Handler to proxy the response
|
||||
func Middleware(options ...Options) macaron.Handler { |
||||
opt := validate(options) |
||||
writerPool = *NewWriterPool(opt.CompressionLevel) |
||||
regex := regexp.MustCompile(`bytes=(\d+)\-.*`) |
||||
|
||||
return func(ctx *macaron.Context) { |
||||
// If the client won't accept gzip or x-gzip don't compress
|
||||
if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") && |
||||
!strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") { |
||||
return |
||||
} |
||||
|
||||
// If the client is asking for a specific range of bytes - don't compress
|
||||
if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" { |
||||
|
||||
match := regex.FindStringSubmatch(rangeHdr) |
||||
if match != nil && len(match) > 1 { |
||||
return |
||||
} |
||||
} |
||||
|
||||
// OK we should proxy the response writer
|
||||
// We are still not necessarily going to compress...
|
||||
proxyWriter := &ProxyResponseWriter{ |
||||
ResponseWriter: ctx.Resp, |
||||
} |
||||
defer proxyWriter.Close() |
||||
|
||||
ctx.Resp = proxyWriter |
||||
ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil)) |
||||
|
||||
// Check if render middleware has been registered,
|
||||
// if yes, we need to modify ResponseWriter for it as well.
|
||||
if _, ok := ctx.Render.(*macaron.DummyRender); !ok { |
||||
ctx.Render.SetResponseWriter(proxyWriter) |
||||
} |
||||
|
||||
ctx.Next() |
||||
} |
||||
} |
||||
|
||||
// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents
|
||||
type ProxyResponseWriter struct { |
||||
writer io.WriteCloser |
||||
macaron.ResponseWriter |
||||
stopped bool |
||||
|
||||
code int |
||||
buf []byte |
||||
} |
||||
|
||||
// Write appends data to the proxied gzip writer.
|
||||
func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) { |
||||
// if writer is initialized, use the writer
|
||||
if proxy.writer != nil { |
||||
return proxy.writer.Write(b) |
||||
} |
||||
|
||||
proxy.buf = append(proxy.buf, b...) |
||||
|
||||
var ( |
||||
contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader)) |
||||
contentType = proxy.Header().Get(contentTypeHeader) |
||||
contentEncoding = proxy.Header().Get(contentEncodingHeader) |
||||
) |
||||
|
||||
// OK if an encoding hasn't been chosen, and content length > 1400
|
||||
// and content type isn't a compressed type
|
||||
if contentEncoding == "" && |
||||
(contentLength == 0 || contentLength >= MinSize) && |
||||
(contentType == "" || !compressedContentType(contentType)) { |
||||
// If current buffer is less than the min size and a Content-Length isn't set, then wait
|
||||
if len(proxy.buf) < MinSize && contentLength == 0 { |
||||
return len(b), nil |
||||
} |
||||
|
||||
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
|
||||
if contentLength >= MinSize || len(proxy.buf) >= MinSize { |
||||
// if we don't know the content type, infer it
|
||||
if contentType == "" { |
||||
contentType = http.DetectContentType(proxy.buf) |
||||
proxy.Header().Set(contentTypeHeader, contentType) |
||||
} |
||||
// If the Content-Type is not compressed - Compress!
|
||||
if !compressedContentType(contentType) { |
||||
if err := proxy.startGzip(); err != nil { |
||||
return 0, err |
||||
} |
||||
return len(b), nil |
||||
} |
||||
} |
||||
} |
||||
// If we got here, we should not GZIP this response.
|
||||
if err := proxy.startPlain(); err != nil { |
||||
return 0, err |
||||
} |
||||
return len(b), nil |
||||
} |
||||
|
||||
func (proxy *ProxyResponseWriter) startGzip() error { |
||||
// Set the content-encoding and vary headers.
|
||||
proxy.Header().Set(contentEncodingHeader, "gzip") |
||||
proxy.Header().Set(varyHeader, acceptEncodingHeader) |
||||
|
||||
// if the Content-Length is already set, then calls to Write on gzip
|
||||
// will fail to set the Content-Length header since its already set
|
||||
// See: https://github.com/golang/go/issues/14975.
|
||||
proxy.Header().Del(contentLengthHeader) |
||||
|
||||
// Write the header to gzip response.
|
||||
if proxy.code != 0 { |
||||
proxy.ResponseWriter.WriteHeader(proxy.code) |
||||
// Ensure that no other WriteHeader's happen
|
||||
proxy.code = 0 |
||||
} |
||||
|
||||
// Initialize and flush the buffer into the gzip response if there are any bytes.
|
||||
// If there aren't any, we shouldn't initialize it yet because on Close it will
|
||||
// write the gzip header even if nothing was ever written.
|
||||
if len(proxy.buf) > 0 { |
||||
// Initialize the GZIP response.
|
||||
proxy.writer = writerPool.Get(proxy.ResponseWriter) |
||||
|
||||
return proxy.writeBuf() |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (proxy *ProxyResponseWriter) startPlain() error { |
||||
if proxy.code != 0 { |
||||
proxy.ResponseWriter.WriteHeader(proxy.code) |
||||
proxy.code = 0 |
||||
} |
||||
proxy.stopped = true |
||||
proxy.writer = noopCloser{proxy.ResponseWriter} |
||||
return proxy.writeBuf() |
||||
} |
||||
|
||||
func (proxy *ProxyResponseWriter) writeBuf() error { |
||||
if proxy.buf == nil { |
||||
return nil |
||||
} |
||||
|
||||
n, err := proxy.writer.Write(proxy.buf) |
||||
|
||||
// This should never happen (per io.Writer docs), but if the write didn't
|
||||
// accept the entire buffer but returned no specific error, we have no clue
|
||||
// what's going on, so abort just to be safe.
|
||||
if err == nil && n < len(proxy.buf) { |
||||
err = io.ErrShortWrite |
||||
} |
||||
proxy.buf = nil |
||||
return err |
||||
} |
||||
|
||||
// WriteHeader will ensure that we have setup the writer before we write the header
|
||||
func (proxy *ProxyResponseWriter) WriteHeader(code int) { |
||||
if proxy.code == 0 { |
||||
proxy.code = code |
||||
} |
||||
} |
||||
|
||||
// Close the writer
|
||||
func (proxy *ProxyResponseWriter) Close() error { |
||||
if proxy.stopped { |
||||
return nil |
||||
} |
||||
|
||||
if proxy.writer == nil { |
||||
err := proxy.startPlain() |
||||
|
||||
if err != nil { |
||||
err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error()) |
||||
} |
||||
} |
||||
|
||||
err := proxy.writer.Close() |
||||
|
||||
if poolWriter, ok := proxy.writer.(*gzip.Writer); ok { |
||||
writerPool.Put(poolWriter) |
||||
} |
||||
|
||||
proxy.writer = nil |
||||
proxy.stopped = true |
||||
return err |
||||
} |
||||
|
||||
// Flush the writer
|
||||
func (proxy *ProxyResponseWriter) Flush() { |
||||
if proxy.writer == nil { |
||||
return |
||||
} |
||||
|
||||
if gw, ok := proxy.writer.(*gzip.Writer); ok { |
||||
gw.Flush() |
||||
} |
||||
|
||||
proxy.ResponseWriter.Flush() |
||||
} |
||||
|
||||
// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
|
||||
// Hijacker, its Hijack method is returned. Otherwise an error is returned.
|
||||
func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { |
||||
hijacker, ok := proxy.ResponseWriter.(http.Hijacker) |
||||
if !ok { |
||||
return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface") |
||||
} |
||||
return hijacker.Hijack() |
||||
} |
||||
|
||||
// verify Hijacker interface implementation
|
||||
var _ http.Hijacker = &ProxyResponseWriter{} |
||||
|
||||
func compressedContentType(contentType string) bool { |
||||
switch contentType { |
||||
case "application/zip": |
||||
return true |
||||
case "application/x-gzip": |
||||
return true |
||||
case "application/gzip": |
||||
return true |
||||
default: |
||||
return false |
||||
} |
||||
} |
@ -0,0 +1,131 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gzip |
||||
|
||||
import ( |
||||
"archive/zip" |
||||
"bytes" |
||||
"io/ioutil" |
||||
"net/http" |
||||
"net/http/httptest" |
||||
"testing" |
||||
|
||||
gzipp "github.com/klauspost/compress/gzip" |
||||
"github.com/stretchr/testify/assert" |
||||
macaron "gopkg.in/macaron.v1" |
||||
) |
||||
|
||||
func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) { |
||||
m := macaron.New() |
||||
m.Use(Middleware()) |
||||
m.Get("/", func() *[]byte { return &sampleResponse }) |
||||
return m, &sampleResponse |
||||
} |
||||
|
||||
func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) { |
||||
// Request without accept gzip: Should not gzip
|
||||
resp := httptest.NewRecorder() |
||||
req, err := http.NewRequest("GET", "/", nil) |
||||
assert.NoError(t, err) |
||||
m.ServeHTTP(resp, req) |
||||
|
||||
_, ok := resp.HeaderMap[contentEncodingHeader] |
||||
assert.False(t, ok) |
||||
|
||||
contentEncoding := resp.Header().Get(contentEncodingHeader) |
||||
assert.NotContains(t, contentEncoding, "gzip") |
||||
|
||||
result := resp.Body.Bytes() |
||||
assert.Equal(t, *sampleResponse, result) |
||||
} |
||||
|
||||
func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) { |
||||
// Request without accept gzip: Should not gzip
|
||||
resp := httptest.NewRecorder() |
||||
req, err := http.NewRequest("GET", "/", nil) |
||||
assert.NoError(t, err) |
||||
req.Header.Set(acceptEncodingHeader, "gzip") |
||||
m.ServeHTTP(resp, req) |
||||
|
||||
_, ok := resp.HeaderMap[contentEncodingHeader] |
||||
assert.Equal(t, ok, expectGzip) |
||||
|
||||
contentEncoding := resp.Header().Get(contentEncodingHeader) |
||||
if expectGzip { |
||||
assert.Contains(t, contentEncoding, "gzip") |
||||
gzippReader, err := gzipp.NewReader(resp.Body) |
||||
assert.NoError(t, err) |
||||
result, err := ioutil.ReadAll(gzippReader) |
||||
assert.NoError(t, err) |
||||
assert.Equal(t, *sampleResponse, result) |
||||
} else { |
||||
assert.NotContains(t, contentEncoding, "gzip") |
||||
result := resp.Body.Bytes() |
||||
assert.Equal(t, *sampleResponse, result) |
||||
} |
||||
} |
||||
|
||||
func TestMiddlewareSmall(t *testing.T) { |
||||
m, sampleResponse := setup([]byte("Small response")) |
||||
|
||||
reqNoAcceptGzip(t, m, sampleResponse) |
||||
|
||||
reqAcceptGzip(t, m, sampleResponse, false) |
||||
} |
||||
|
||||
func TestMiddlewareLarge(t *testing.T) { |
||||
b := make([]byte, MinSize+1) |
||||
for i := range b { |
||||
b[i] = byte(i % 256) |
||||
} |
||||
m, sampleResponse := setup(b) |
||||
|
||||
reqNoAcceptGzip(t, m, sampleResponse) |
||||
|
||||
// This should be gzipped as we accept gzip
|
||||
reqAcceptGzip(t, m, sampleResponse, true) |
||||
} |
||||
|
||||
func TestMiddlewareGzip(t *testing.T) { |
||||
b := make([]byte, MinSize*10) |
||||
for i := range b { |
||||
b[i] = byte(i % 256) |
||||
} |
||||
outputBuffer := bytes.NewBuffer([]byte{}) |
||||
gzippWriter := gzipp.NewWriter(outputBuffer) |
||||
gzippWriter.Write(b) |
||||
gzippWriter.Flush() |
||||
gzippWriter.Close() |
||||
output := outputBuffer.Bytes() |
||||
|
||||
m, sampleResponse := setup(output) |
||||
|
||||
reqNoAcceptGzip(t, m, sampleResponse) |
||||
|
||||
// This should not be gzipped even though we accept gzip
|
||||
reqAcceptGzip(t, m, sampleResponse, false) |
||||
} |
||||
|
||||
func TestMiddlewareZip(t *testing.T) { |
||||
b := make([]byte, MinSize*10) |
||||
for i := range b { |
||||
b[i] = byte(i % 256) |
||||
} |
||||
outputBuffer := bytes.NewBuffer([]byte{}) |
||||
zipWriter := zip.NewWriter(outputBuffer) |
||||
fileWriter, err := zipWriter.Create("default") |
||||
assert.NoError(t, err) |
||||
fileWriter.Write(b) |
||||
//fileWriter.Close()
|
||||
zipWriter.Close() |
||||
output := outputBuffer.Bytes() |
||||
|
||||
m, sampleResponse := setup(output) |
||||
|
||||
reqNoAcceptGzip(t, m, sampleResponse) |
||||
|
||||
// This should not be gzipped even though we accept gzip
|
||||
reqAcceptGzip(t, m, sampleResponse, false) |
||||
} |
Loading…
Reference in new issue