Implement caching & limit concurrency

This commit is contained in:
Moritz Marquardt 2021-03-19 20:58:53 +01:00
parent 8ead10c82e
commit 203e230905
No known key found for this signature in database
GPG Key ID: D5788327BEE388B6
4 changed files with 62 additions and 49 deletions

1
go.mod
View File

@ -3,6 +3,7 @@ module codeberg.org/codeberg/pages
go 1.16
require (
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a
github.com/valyala/fasthttp v1.22.0
github.com/valyala/fastjson v1.6.3
)

2
go.sum
View File

@ -1,3 +1,5 @@
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a h1:Cf4CrDeyrIcuIiJZEZJAH5dapqQ6J3OmP/vHPbDjaFA=
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a/go.mod h1:ig6eVXkYn/9dz0Vm8UdLf+E0u1bE6kBSn3n2hqk6jas=
github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/klauspost/compress v1.11.8 h1:difgzQsp5mdAz9v8lm3P/I+EpDKMU/6uTMw1y1FObuo=

View File

@ -3,10 +3,10 @@ package main
import (
"bytes"
"fmt"
"github.com/OrlovEvgeny/go-mcache"
"github.com/valyala/fasthttp"
"github.com/valyala/fastjson"
"mime"
"net/url"
"path"
"strconv"
"strings"
@ -69,34 +69,30 @@ func handler(ctx *fasthttp.RequestCtx) {
if repo == "" {
return false
}
fmt.Printf("Trying branch: %s/%s/%s with path %v\n", targetOwner, repo, branch, path)
escapedBranch, _ := url.PathUnescape(branch)
if escapedBranch == "" {
escapedBranch = branch
}
// Check if the branch exists, otherwise treat it as a file path
targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, repo, branch)
fmt.Printf("Branch %s has timestamp %v\n", targetBranch, targetOptions.BranchTimestamp)
if targetOptions.BranchTimestamp != (time.Time{}) {
// Branch exists, use it
targetRepo = repo
targetPath = strings.Trim(strings.Join(path, "/"), "/")
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex")
ctx.Response.Header.Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
return true
} else {
branchTimestampResult := getBranchTimestamp(targetOwner, repo, branch)
if branchTimestampResult == nil {
// branch doesn't exist
return false
}
// Branch exists, use it
targetRepo = repo
targetPath = strings.Trim(strings.Join(path, "/"), "/")
targetBranch = branchTimestampResult.branch
targetOptions.BranchTimestamp = branchTimestampResult.timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex")
ctx.Response.Header.Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
return true
}
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
@ -209,36 +205,49 @@ func returnErrorPage(ctx *fasthttp.RequestCtx, code int) {
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+fasthttp.StatusMessage(code))))
}
type branchTimestamp struct {
branch string
timestamp time.Time
}
var branchTimestampCache = mcache.New()
// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
// (or an empty time.Time if the branch doesn't exist)
// TODO: cache responses for ~15 minutes if a branch exists
func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string, t time.Time) {
branchWithFallback = branch
func getBranchTimestamp(owner, repo, branch string) *branchTimestamp {
if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok {
return result.(*branchTimestamp)
}
result := &branchTimestamp{}
result.branch = branch
if branch == "" {
var body = make([]byte, 0)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo), 10*time.Second)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second)
if err != nil || status != 200 {
fmt.Printf("Default branch request to Gitea API failed with status code %d and error %s\n", status, err)
branchWithFallback = ""
return
return nil
}
branch = fastjson.GetString(body, "default_branch")
branchWithFallback = branch
result.branch = branch
}
var body = make([]byte, 0)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo)+"/branches/"+url.PathEscape(branch), 10*time.Second)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second)
if err != nil || status != 200 {
fmt.Printf("Branch info request to Gitea API failed with status code %d and error %s\n", status, err)
branchWithFallback = ""
return
return nil
}
t, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp"))
return
result.timestamp, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp"))
_ = branchTimestampCache.Set(owner + "/" + repo + "/" + branch, result, 15 * time.Second)
return result
}
// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
var upstreamClient = fasthttp.Client{
ReadTimeout: 10 * time.Second,
MaxConnDuration: 60 * time.Second,
MaxConnWaitTimeout: 1000 * time.Millisecond,
MaxConnsPerHost: 1024 * 16, // TODO: adjust bottlenecks for best performance with Gitea!
}
// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) {
if options.ForbiddenMimeTypes == nil {
options.ForbiddenMimeTypes = map[string]struct{}{}
@ -246,15 +255,14 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
// Check if the branch exists and when it was modified
if options.BranchTimestamp == (time.Time{}) {
targetBranch, options.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, targetBranch)
}
branch := getBranchTimestamp(targetOwner, targetRepo, targetBranch)
// Handle repositories with no/broken pages setup
if options.BranchTimestamp == (time.Time{}) || targetBranch == "" {
ctx.Response.SetStatusCode(fasthttp.StatusFailedDependency)
ctx.Response.Header.SetContentType("text/html; charset=utf-8")
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte("pages not set up for this repo")))
return true
if branch == nil {
returnErrorPage(ctx, fasthttp.StatusFailedDependency)
return true
}
targetBranch = branch.branch
options.BranchTimestamp = branch.timestamp
}
if targetOwner == "" || targetRepo == "" || targetBranch == "" {
@ -272,9 +280,9 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
// Make a GET request to the upstream URL
req := fasthttp.AcquireRequest()
req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + url.PathEscape(targetOwner) + "/" + url.PathEscape(targetRepo) + "/raw/" + url.PathEscape(targetBranch) + "/" + url.PathEscape(targetPath))
req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath)
res := fasthttp.AcquireResponse()
err := fasthttp.DoTimeout(req, res, 10*time.Second)
err := upstreamClient.Do(req, res)
// Handle errors
if res.StatusCode() == fasthttp.StatusNotFound {

View File

@ -99,6 +99,8 @@ func main() {
NoDefaultServerHeader: true,
NoDefaultDate: true,
ReadTimeout: 10 * time.Second,
Concurrency: 1024 * 32, // TODO: adjust bottlenecks for best performance with Gitea!
MaxConnsPerIP: 100,
}).Serve(listener)
if err != nil {
fmt.Printf("Couldn't start server: %s\n", err)