Skip to content

Commit

Permalink
Ristretto-based memory-limited cache (#13)
Browse files Browse the repository at this point in the history
* try out ristretto with a bounded cache

* fixed the test

* --cacheUseRistretto and v0.0.22

* code inspection fix
  • Loading branch information
d-led authored Feb 19, 2021
1 parent bac331f commit c241f73
Show file tree
Hide file tree
Showing 9 changed files with 239 additions and 17 deletions.
11 changes: 11 additions & 0 deletions .link-checker-service.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,18 @@ corsOrigins = [
maxConcurrentHTTPRequests = 200

cacheExpirationInterval = "24h"

# this option is valid only for the default cache
cacheCleanupInterval = "48h"

# use a github.com/dgraph-io/ristretto based cache that can limit the cache memory
# this cache is expected to be slightly slower due to the size calculations
cacheUseRistretto = true
cacheMaxSize = 1000_000_000 # approx. max cache size in bytes
cacheNumCounters = 10_000_000 # number of 4-bit access counters. Set at approx 10x max unique expected URLs

# failures can happen for any reason
# failing links will be retried in a subsequent check after that period
retryFailedAfter = "2m"

# rate-limit requests by IP. Empty string for no limits
Expand Down
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

Notable changes will be documented here

## 0.0.22

- link-checker-service:
- a new optional memory-limited cache based on [github.com/dgraph-io/ristretto](https://github.com/dgraph-io/ristretto)
-
- run the service with `--cacheUseRistretto`
- see the options in [.link-checker-service.toml](.link-checker-service.toml)

## 0.9.21

- link-checker-service:
Expand Down
12 changes: 12 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ const (
maxConcurrentHTTPRequestsKey = "maxConcurrentHTTPRequests"
cacheExpirationIntervalKey = "cacheExpirationInterval"
cacheCleanupIntervalKey = "cacheCleanupInterval"
cacheUseRistrettoKey = "cacheUseRistretto"
cacheMaxSizeKey = "cacheMaxSize"
cacheNumCountersKey = "cacheNumCounters"
retryFailedAfterKey = "retryFailedAfter"
maxURLsInRequestKey = "maxURLsInRequest"
requestsPerSecondPerDomainKey = "requestsPerSecondPerDomain"
Expand Down Expand Up @@ -101,10 +104,19 @@ func init() {
// service
rootCmd.PersistentFlags().UintP(maxConcurrentHTTPRequestsKey, "c", 256, "maximum number of total concurrent HTTP requests")
_ = viper.BindPFlag(maxConcurrentHTTPRequestsKey, rootCmd.PersistentFlags().Lookup(maxConcurrentHTTPRequestsKey))

// cache
rootCmd.PersistentFlags().String(cacheExpirationIntervalKey, "24h", "Expire each URL check result after <interval> (in ns/us/ms/s/m/h)")
_ = viper.BindPFlag(cacheExpirationIntervalKey, rootCmd.PersistentFlags().Lookup(cacheExpirationIntervalKey))
rootCmd.PersistentFlags().String(cacheCleanupIntervalKey, "48h", "Interval between cache cleanups (in ns/us/ms/s/m/h)")
_ = viper.BindPFlag(cacheCleanupIntervalKey, rootCmd.PersistentFlags().Lookup(cacheCleanupIntervalKey))
rootCmd.PersistentFlags().Bool(cacheUseRistrettoKey, false, "Use a memory-bound cache (see the cacheMaxSize option)")
_ = viper.BindPFlag(cacheUseRistrettoKey, rootCmd.PersistentFlags().Lookup(cacheUseRistrettoKey))
rootCmd.PersistentFlags().Int64(cacheMaxSizeKey, 1000_000_000, "Approximage maximum cache size in bytes (when cacheUseRistretto enabled)")
_ = viper.BindPFlag(cacheMaxSizeKey, rootCmd.PersistentFlags().Lookup(cacheMaxSizeKey))
rootCmd.PersistentFlags().Int64(cacheNumCountersKey, 10_000_000, "Number of 4-bit access counters. Set at approx 10x max unique expected URLs (when cacheUseRistretto enabled)")
_ = viper.BindPFlag(cacheNumCountersKey, rootCmd.PersistentFlags().Lookup(cacheNumCountersKey))

rootCmd.PersistentFlags().String(retryFailedAfterKey, "30s", "If a URL check failed, e.g. intermittently, re-run it after <interval> (in ns/us/ms/s/m/h)")
_ = viper.BindPFlag(retryFailedAfterKey, rootCmd.PersistentFlags().Lookup(retryFailedAfterKey))
rootCmd.PersistentFlags().UintP(maxURLsInRequestKey, "m", 0, "Maximum number URLs allowed per request")
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ module github.com/siemens/link-checker-service
go 1.15

require (
github.com/alvaroloes/enumer v1.1.2 // indirect
github.com/appleboy/gin-jwt/v2 v2.6.4
github.com/darren/gpac v0.0.0-20200702020854-d9398608e64a
github.com/dgraph-io/ristretto v0.0.3
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/gin-contrib/cors v1.3.1
Expand Down
12 changes: 12 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataDog/datadog-go v0.0.0-20180330214955-e67964b4021a/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alvaroloes/enumer v1.1.2 h1:5khqHB33TZy1GWCO/lZwcroBFh7u+0j40T83VUbfAMY=
github.com/alvaroloes/enumer v1.1.2/go.mod h1:FxrjvuXoDAx9isTJrv4c+T410zFi0DtXIT0m65DJ+Wo=
github.com/appleboy/gin-jwt/v2 v2.6.4 h1:4YlMh3AjCFnuIRiL27b7TXns7nLx8tU/TiSgh40RRUI=
github.com/appleboy/gin-jwt/v2 v2.6.4/go.mod h1:CZpq1cRw+kqi0+yD2CwVw7VGXrrx4AqBdeZnwxVmoAs=
github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4=
Expand All @@ -31,6 +34,7 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
Expand All @@ -47,8 +51,12 @@ github.com/darren/gpac v0.0.0-20200702020854-d9398608e64a/go.mod h1:1Id6bMaG5dQY
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/ristretto v0.0.3 h1:jh22xisGBjrEVnRZ1DVTpBVQm0Xndu8sMl0CWDzSIBI=
github.com/dgraph-io/ristretto v0.0.3/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dlclark/regexp2 v1.2.0 h1:8sAhBGEM0dRWogWqWyQeIJnxjWO6oIjl8FKqREDsGfk=
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
Expand Down Expand Up @@ -227,6 +235,8 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1 h1:/I3lTljEEDNYLho3/FUB7iD/oc2cEFgVmbHzV+O0PtU=
github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
Expand Down Expand Up @@ -266,6 +276,7 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
Expand Down Expand Up @@ -431,6 +442,7 @@ golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524210228-3d17549cdc6b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
Expand Down
89 changes: 89 additions & 0 deletions infrastructure/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package infrastructure

import (
"encoding/json"
"github.com/dgraph-io/ristretto"
"github.com/patrickmn/go-cache"
"time"
)

type resultCache interface {
Get(url string) (*URLCheckResult, bool)
Set(url string, res *URLCheckResult)
}

type ristrettoCache struct {
cache *ristretto.Cache
defaultExpiration time.Duration
}

func (c ristrettoCache) Set(url string, res *URLCheckResult) {
c.cache.SetWithTTL(url, res, approxSizeOf(url, res), c.defaultExpiration)
}

func (c ristrettoCache) Get(url string) (*URLCheckResult, bool) {
value, found := c.cache.Get(url)

if found {
return value.(*URLCheckResult), true
}

return nil, false
}

type defaultCache struct {
cache *cache.Cache
}

func (c defaultCache) Set(url string, res *URLCheckResult) {
c.cache.Set(url, res, cache.DefaultExpiration)
}

func (c defaultCache) Get(url string) (*URLCheckResult, bool) {
value, found := c.cache.Get(url)

if found {
return value.(*URLCheckResult), true
}

return nil, false
}

func newCache(settings cacheSettings) resultCache {
if settings.cacheUseRistretto {
return newRistrettoCache(settings)
} else {
return newDefaultCache(settings)
}
}

func newRistrettoCache(settings cacheSettings) *ristrettoCache {
// https://github.com/dgraph-io/ristretto#Config
rc, err := ristretto.NewCache(&ristretto.Config{
NumCounters: settings.cacheNumCounters, // number of keys to track frequency of (~10x max links)
MaxCost: settings.cacheMaxSize, // maximum cost of cache (in bytes)
BufferItems: 64, // number of keys per Get buffer: as recommended
})
if err != nil {
panic(err)
}
return &ristrettoCache{
cache: rc,
defaultExpiration: settings.cacheExpirationInterval,
}
}

func newDefaultCache(settings cacheSettings) *defaultCache {
return &defaultCache{
cache: cache.New(settings.cacheExpirationInterval, settings.cacheCleanupInterval),
}
}

func approxSizeOf(key string, res *URLCheckResult) int64 {
bytes, err := json.Marshal(res)
if err != nil {
return 512 + int64(len(key)) // any number should suffice - approximate calculation
}
blob := string(bytes)
return int64(len(blob) + len(key))
}
50 changes: 36 additions & 14 deletions infrastructure/cached_url_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,37 @@ import (
"time"

"github.com/spf13/viper"

"github.com/patrickmn/go-cache"
)

const defaultCacheExpirationInterval = 24 * time.Hour
const defaultCacheCleanupInterval = 48 * time.Hour
const defaultRetryFailedAfter = 30 * time.Second
const defaultCacheMaxSize int64 = 1e9
const defaultCacheNumCounters int64 = 10_000_000

// CachedURLChecker wraps a concurrency-limited URL checker
type CachedURLChecker struct {
cache *cache.Cache
ccLimitedChecker *CCLimitedURLChecker
cache resultCache
retryFailedAfterSeconds int64

ccLimitedChecker *CCLimitedURLChecker
}

type cacheSettings struct {
cacheUseRistretto bool
cacheExpirationInterval time.Duration
cacheCleanupInterval time.Duration
cacheMaxSize int64
cacheNumCounters int64
retryFailedAfter time.Duration
}

// NewCachedURLChecker creates a new cached URL checker instance
func NewCachedURLChecker() *CachedURLChecker {
settings := fetchCachedURLCheckerSettings()

checker := CachedURLChecker{
cache: cache.New(settings.cacheExpirationInterval, settings.cacheCleanupInterval),
cache: newCache(settings),
ccLimitedChecker: NewCCLimitedURLChecker(),
retryFailedAfterSeconds: int64(settings.retryFailedAfter.Seconds()),
}
Expand All @@ -62,6 +67,27 @@ func fetchCachedURLCheckerSettings() cacheSettings {
s.cacheCleanupInterval = d
}

cacheUseRistretto := viper.GetBool("cacheUseRistretto")
log.Printf("cacheUseRistretto: %v", cacheUseRistretto)
s.cacheUseRistretto = cacheUseRistretto

cacheMaxSize := defaultCacheMaxSize
if cms := viper.GetInt64("cacheMaxSize"); cms > 0 {
cacheMaxSize = cms
}
s.cacheMaxSize = cacheMaxSize

cacheNumCounters := defaultCacheNumCounters
if cnc := viper.GetInt64("cacheNumCounters"); cnc > 0 {
cacheNumCounters = cnc
}
s.cacheNumCounters = cacheNumCounters

if cacheUseRistretto {
log.Printf("cacheMaxSize: %v", cacheMaxSize)
log.Printf("cacheNumCounters: %v", cacheNumCounters)
}

retryFailedAfter := viper.GetString("retryFailedAfter")
if d, err := time.ParseDuration(retryFailedAfter); err != nil {
log.Printf("Ignoring retryFailedAfter %v -> %v (%v)", cacheCleanupInterval, defaultRetryFailedAfter, err)
Expand All @@ -74,21 +100,17 @@ func fetchCachedURLCheckerSettings() cacheSettings {

// CheckURL checks the desired URL
func (c *CachedURLChecker) CheckURL(ctx context.Context, url string) *URLCheckResult {
value, found := c.cache.Get(url)

if found {
res := value.(*URLCheckResult)
res, found := c.cache.Get(url)

if found && c.shouldTakeCachedResult(res) {
// failures could have been temporary -> retry a URL after some time
if c.shouldTakeCachedResult(res) {
return res
}
return res
}

// otherwise, do the check & store
res := c.ccLimitedChecker.CheckURL(ctx, url)
res = c.ccLimitedChecker.CheckURL(ctx, url)
if res.Status != Dropped {
c.cache.Set(url, res, cache.DefaultExpiration)
c.cache.Set(url, res)
}
return res
}
Expand Down
5 changes: 2 additions & 3 deletions infrastructure/url_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,11 +383,10 @@ func (c *URLCheckerClient) CheckURL(ctx context.Context, url string) *URLCheckRe

if lastRes != nil {
lastRes.CheckerTrace = checkerTrace
elapsed := time.Since(start)
lastRes.ElapsedMs = int64(elapsed / time.Millisecond)
}

elapsed := time.Since(start)
lastRes.ElapsedMs = int64(elapsed / time.Millisecond)

return lastRes
}

Expand Down
Loading

0 comments on commit c241f73

Please sign in to comment.