From 25a8814be39c7c5ca83294ff553018ca6d0a3c67 Mon Sep 17 00:00:00 2001 From: Li Date: Fri, 10 Jan 2025 16:14:48 +0800 Subject: [PATCH] Use Intersect to Narrow Iterate Range and Reduce Memory Allocation --- posting/list.go | 56 ++++++++++++++++++++++++++++++------------------- x/x.go | 32 ++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 21 deletions(-) diff --git a/posting/list.go b/posting/list.go index 17a415050b3..e4038b2b848 100644 --- a/posting/list.go +++ b/posting/list.go @@ -1672,10 +1672,8 @@ func (l *List) Uids(opt ListOptions) (*pb.List, error) { if opt.First == 0 { opt.First = math.MaxInt32 } - // Pre-assign length to make it faster. l.RLock() - // Use approximate length for initial capacity. - res := make([]uint64, 0, l.mutationMap.len()+codec.ApproxLen(l.plist.Pack)) + out := &pb.List{} if l.mutationMap.len() == 0 && opt.Intersect != nil && len(l.plist.Splits) == 0 { if opt.ReadTs < l.minTs { @@ -1687,29 +1685,45 @@ func (l *List) Uids(opt ListOptions) (*pb.List, error) { return out, nil } - var uidMin, uidMax uint64 = 0, 0 - if opt.Intersect != nil && len(opt.Intersect.Uids) > 0 { - uidMin = opt.Intersect.Uids[0] - uidMax = opt.Intersect.Uids[len(opt.Intersect.Uids)-1] + // Pre-assign length to make it faster. + res := make([]uint64, 0, x.MinInt(opt.First, len(opt.Intersect.Uids), l.mutationMap.len()+codec.ApproxLen(l.plist.Pack))) + + checkLimit := func() bool { + // We need the last N. + // TODO: This could be optimized by only considering some of the last UidBlocks. + if opt.First < 0 { + if len(res) > -opt.First { + res = res[1:] + } + } else if len(res) > opt.First { + return true + } + return false } - err := l.iterate(opt.ReadTs, opt.AfterUid, func(p *pb.Posting) error { - if p.PostingType == pb.Posting_REF { - if p.Uid < uidMin { - return nil + if opt.Intersect != nil && len(opt.Intersect.Uids) < l.mutationMap.len()+codec.ApproxLen(l.plist.Pack) { + for _, uid := range opt.Intersect.Uids { + found, _, err := l.findPosting(opt.ReadTs, uid) + if err != nil { + l.RUnlock() + return out, errors.Wrapf(err, "While find posting for UIDs") } - if p.Uid > uidMax && uidMax > 0 { - return ErrStopIteration + if found { + res = append(res, uid) + if checkLimit() { + break + } } - res = append(res, p.Uid) + } + out.Uids = res + l.RUnlock() + return out, nil + } - if opt.First < 0 { - // We need the last N. - // TODO: This could be optimized by only considering some of the last UidBlocks. - if len(res) > -opt.First { - res = res[1:] - } - } else if len(res) > opt.First { + err := l.iterate(opt.ReadTs, opt.AfterUid, func(p *pb.Posting) error { + if p.PostingType == pb.Posting_REF { + res = append(res, p.Uid) + if checkLimit() { return ErrStopIteration } } diff --git a/x/x.go b/x/x.go index f0e7aa5287e..82282acffce 100644 --- a/x/x.go +++ b/x/x.go @@ -619,6 +619,38 @@ func Max(a, b uint64) uint64 { return b } +// MinInt returns the smallest integer among the given numbers. +// The first two arguments are mandatory, additional numbers are optional. +func MinInt(a, b int, nums ...int) int { + min := a + if b < min { + min = b + } + + for _, num := range nums { + if num < min { + min = num + } + } + return min +} + +// MaxInt returns the largest integer among the given numbers. +// The first two arguments are mandatory, additional numbers are optional. +func MaxInt(a, b int, nums ...int) int { + max := a + if b > max { + max = b + } + + for _, num := range nums { + if num > max { + max = num + } + } + return max +} + // ExponentialRetry runs the given function until it succeeds or can no longer be retried. func ExponentialRetry(maxRetries int, waitAfterFailure time.Duration, f func() error) error {