Optimize `PrevEventIDs` when getting thousands of backwards extremeties (#3308)

Changes how many `PrevEventIDs` we send to other servers when
backfilling, capped to 100 events.

Unsure about how representative this benchmark is..
```
goos: linux
goarch: amd64
pkg: github.com/matrix-org/dendrite/roomserver/api
cpu: Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
                            │    old.txt     │               new.txt               │
                            │     sec/op     │   sec/op     vs base                │
PrevEventIDs/Original1-8         264.9n ± 5%   237.4n ± 7%  -10.36% (p=0.000 n=10)
PrevEventIDs/Original10-8        3.101µ ± 4%   1.590µ ± 2%  -48.72% (p=0.000 n=10)
PrevEventIDs/Original100-8       44.32µ ± 2%   12.80µ ± 4%  -71.11% (p=0.000 n=10)
PrevEventIDs/Original500-8     263.835µ ± 4%   7.907µ ± 4%  -97.00% (p=0.000 n=10)
PrevEventIDs/Original1000-8    578.798µ ± 2%   7.620µ ± 2%  -98.68% (p=0.000 n=10)
PrevEventIDs/Original2000-8   1272.039µ ± 2%   8.241µ ± 9%  -99.35% (p=0.000 n=10)
geomean                          43.81µ        3.659µ       -91.65%

                            │    old.txt     │               new.txt                │
                            │      B/op      │     B/op      vs base                │
PrevEventIDs/Original1-8          72.00 ± 0%     48.00 ± 0%  -33.33% (p=0.000 n=10)
PrevEventIDs/Original10-8        1512.0 ± 0%     500.0 ± 0%  -66.93% (p=0.000 n=10)
PrevEventIDs/Original100-8     11.977Ki ± 0%   7.023Ki ± 0%  -41.36% (p=0.000 n=10)
PrevEventIDs/Original500-8     67.227Ki ± 0%   7.023Ki ± 0%  -89.55% (p=0.000 n=10)
PrevEventIDs/Original1000-8   163.227Ki ± 0%   7.023Ki ± 0%  -95.70% (p=0.000 n=10)
PrevEventIDs/Original2000-8   347.227Ki ± 0%   7.023Ki ± 0%  -97.98% (p=0.000 n=10)
geomean                         12.96Ki        1.954Ki       -84.92%

                            │   old.txt   │              new.txt               │
                            │  allocs/op  │ allocs/op   vs base                │
PrevEventIDs/Original1-8       2.000 ± 0%   1.000 ± 0%  -50.00% (p=0.000 n=10)
PrevEventIDs/Original10-8      6.000 ± 0%   2.000 ± 0%  -66.67% (p=0.000 n=10)
PrevEventIDs/Original100-8     9.000 ± 0%   3.000 ± 0%  -66.67% (p=0.000 n=10)
PrevEventIDs/Original500-8    12.000 ± 0%   3.000 ± 0%  -75.00% (p=0.000 n=10)
PrevEventIDs/Original1000-8   14.000 ± 0%   3.000 ± 0%  -78.57% (p=0.000 n=10)
PrevEventIDs/Original2000-8   16.000 ± 0%   3.000 ± 0%  -81.25% (p=0.000 n=10)
geomean                        8.137        2.335       -71.31%
```
This commit is contained in:
Till 2024-01-20 22:26:57 +01:00 committed by GitHub
parent d357615452
commit 8e4dc6b4ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 123 additions and 7 deletions

View File

@ -95,6 +95,12 @@ func Backfill(
}
}
// Enforce a limit of 100 events, as not to hit the DB to hard.
// Synapse has a hard limit of 100 events as well.
if req.Limit > 100 {
req.Limit = 100
}
// Query the Roomserver.
if err = rsAPI.PerformBackfill(httpReq.Context(), &req, &res); err != nil {
util.GetLogger(httpReq.Context()).WithError(err).Error("query.PerformBackfill failed")

View File

@ -8,7 +8,6 @@ import (
"github.com/matrix-org/dendrite/roomserver/types"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/gomatrixserverlib/spec"
"github.com/matrix-org/util"
)
type PerformCreateRoomRequest struct {
@ -91,14 +90,44 @@ type PerformBackfillRequest struct {
VirtualHost spec.ServerName `json:"virtual_host"`
}
// PrevEventIDs returns the prev_event IDs of all backwards extremities, de-duplicated in a lexicographically sorted order.
// limitPrevEventIDs is the maximum of eventIDs we
// return when calling PrevEventIDs.
const limitPrevEventIDs = 100
// PrevEventIDs returns the prev_event IDs of either 100 backwards extremities or
// len(r.BackwardsExtremities). Limited to 100, due to Synapse/Dendrite stopping after reaching
// this limit. (which sounds sane)
func (r *PerformBackfillRequest) PrevEventIDs() []string {
var prevEventIDs []string
for _, pes := range r.BackwardsExtremities {
prevEventIDs = append(prevEventIDs, pes...)
var uniqueIDs map[string]struct{}
// Create a unique eventID map of either 100 or len(r.BackwardsExtremities).
// 100 since Synapse/Dendrite stops after reaching 100 events.
if len(r.BackwardsExtremities) > limitPrevEventIDs {
uniqueIDs = make(map[string]struct{}, limitPrevEventIDs)
} else {
uniqueIDs = make(map[string]struct{}, len(r.BackwardsExtremities))
}
prevEventIDs = util.UniqueStrings(prevEventIDs)
return prevEventIDs
outerLoop:
for _, pes := range r.BackwardsExtremities {
for _, evID := range pes {
uniqueIDs[evID] = struct{}{}
// We found enough unique eventIDs.
if len(uniqueIDs) >= limitPrevEventIDs {
break outerLoop
}
}
}
// map -> []string
result := make([]string, len(uniqueIDs))
i := 0
for evID := range uniqueIDs {
result[i] = evID
i++
}
return result
}
// PerformBackfillResponse is a response to PerformBackfill.

View File

@ -0,0 +1,81 @@
package api
import (
"fmt"
"math/rand"
"testing"
"github.com/stretchr/testify/assert"
)
func BenchmarkPrevEventIDs(b *testing.B) {
for _, x := range []int64{1, 10, 100, 500, 1000, 2000} {
benchPrevEventIDs(b, int(x))
}
}
func benchPrevEventIDs(b *testing.B, count int) {
bwExtrems := generateBackwardsExtremities(b, count)
backfiller := PerformBackfillRequest{
BackwardsExtremities: bwExtrems,
}
b.Run(fmt.Sprintf("Original%d", count), func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
prevIDs := backfiller.PrevEventIDs()
_ = prevIDs
}
})
}
type testLike interface {
Helper()
}
const randomIDCharsCount = 10
func generateBackwardsExtremities(t testLike, count int) map[string][]string {
t.Helper()
result := make(map[string][]string, count)
for i := 0; i < count; i++ {
eventID := randomEventId(int64(i))
result[eventID] = []string{
randomEventId(int64(i + 1)),
randomEventId(int64(i + 2)),
randomEventId(int64(i + 3)),
}
}
return result
}
const alphanumerics = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
// randomEventId generates a pseudo-random string of length n.
func randomEventId(src int64) string {
randSrc := rand.NewSource(src)
b := make([]byte, randomIDCharsCount)
for i := range b {
b[i] = alphanumerics[randSrc.Int63()%int64(len(alphanumerics))]
}
return string(b)
}
func TestPrevEventIDs(t *testing.T) {
// generate 10 backwards extremities
bwExtrems := generateBackwardsExtremities(t, 10)
backfiller := PerformBackfillRequest{
BackwardsExtremities: bwExtrems,
}
prevIDs := backfiller.PrevEventIDs()
// Given how "generateBackwardsExtremities" works, this
// generates 12 unique event IDs
assert.Equal(t, 12, len(prevIDs))
// generate 200 backwards extremities
backfiller.BackwardsExtremities = generateBackwardsExtremities(t, 200)
prevIDs = backfiller.PrevEventIDs()
// PrevEventIDs returns at max 100 event IDs
assert.Equal(t, 100, len(prevIDs))
}