Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spire-agent: limit JWT-SVID cache size #5633

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/spire-agent/cli/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type agentConfig struct {
AllowedForeignJWTClaims []string `hcl:"allowed_foreign_jwt_claims"`
AvailabilityTarget string `hcl:"availability_target"`
X509SVIDCacheMaxSize int `hcl:"x509_svid_cache_max_size"`
JWTSVIDCacheMaxSize int `hcl:"jwt_svid_cache_max_size"`

AuthorizedDelegates []string `hcl:"authorized_delegates"`

Expand Down Expand Up @@ -501,6 +502,11 @@ func NewAgentConfig(c *Config, logOptions []log.Option, allowUnknownConfig bool)
}
ac.X509SVIDCacheMaxSize = c.Agent.X509SVIDCacheMaxSize

if c.Agent.JWTSVIDCacheMaxSize < 0 {
return nil, errors.New("jwt_svid_cache_max_size should not be negative")
}
ac.JWTSVIDCacheMaxSize = c.Agent.JWTSVIDCacheMaxSize

td, err := common_cli.ParseTrustDomain(c.Agent.TrustDomain, logger)
if err != nil {
return nil, err
Expand Down
3 changes: 2 additions & 1 deletion doc/spire_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ This may be useful for templating configuration files, for example across differ
| `trust_domain` | The trust domain that this agent belongs to (should be no more than 255 characters) | |
| `workload_x509_svid_key_type` | The workload X509 SVID key type &lt;rsa-2048&vert;ec-p256&gt; | ec-p256 |
| `availability_target` | The minimum amount of time desired to gracefully handle SPIRE Server or Agent downtime. This configurable influences how aggressively X509 SVIDs should be rotated. If set, must be at least 24h. See [Availability Target](#availability-target) | |
| `x509_svid_cache_max_size` | Soft limit of max number of SVIDs that would be stored in LRU cache | 1000 |
| `x509_svid_cache_max_size` | Soft limit of max number of X509-SVIDs that would be stored in LRU cache | 1000 |
| `jwt_svid_cache_max_size` | Hard limit of max number of JWT-SVIDs that would be stored in LRU cache | 1000 |

| experimental | Description | Default |
|:------------------------------|--------------------------------------------------------------------------------------|-------------------------|
Expand Down
3 changes: 2 additions & 1 deletion pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ func (a *Agent) newManager(ctx context.Context, sto storage.Storage, cat catalog
Storage: sto,
SyncInterval: a.c.SyncInterval,
UseSyncAuthorizedEntries: a.c.UseSyncAuthorizedEntries,
SVIDCacheMaxSize: a.c.X509SVIDCacheMaxSize,
X509SVIDCacheMaxSize: a.c.X509SVIDCacheMaxSize,
JWTSVIDCacheMaxSize: a.c.JWTSVIDCacheMaxSize,
SVIDStoreCache: cache,
NodeAttestor: na,
RotationStrategy: rotationutil.NewRotationStrategy(a.c.AvailabilityTarget),
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/api/delegatedidentity/v1/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ func (m *FakeManager) SubscribeToBundleChanges() *cache.BundleStream {

func newTestCache() *cache.LRUCache {
log, _ := test.NewNullLogger()
return cache.NewLRUCache(log, trustDomain1, bundle1, telemetry.Blackhole{}, cache.DefaultSVIDCacheMaxSize, clock.New())
return cache.NewLRUCache(log, trustDomain1, bundle1, telemetry.Blackhole{}, cache.DefaultSVIDCacheMaxSize, cache.DefaultSVIDCacheMaxSize, clock.New())
}

func generateSubscribeToX509SVIDMetrics() []fakemetrics.MetricItem {
Expand Down
5 changes: 4 additions & 1 deletion pkg/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,12 @@ type Config struct {
// is used to sync entries from the server.
UseSyncAuthorizedEntries bool

// X509SVIDCacheMaxSize is a soft limit of max number of SVIDs that would be stored in cache
// X509SVIDCacheMaxSize is a soft limit of max number of X509-SVIDs that would be stored in cache
X509SVIDCacheMaxSize int

// JWTSVIDCacheMaxSize is a soft limit of max number of JWT-SVIDs that would be stored in cache
JWTSVIDCacheMaxSize int

// Trust domain and associated CA bundle
TrustDomain spiffeid.TrustDomain
TrustBundle []*x509.Certificate
Expand Down
71 changes: 61 additions & 10 deletions pkg/agent/manager/cache/jwt_cache.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cache

import (
"container/list"
"context"
"crypto/sha256"
"encoding/base64"
Expand All @@ -23,18 +24,36 @@ type JWTSVIDCache struct {
log logrus.FieldLogger
metrics telemetry.Metrics
mu sync.RWMutex
svids map[string]*client.JWTSVID

svids map[string]*list.Element
lruList *list.List

// svidCacheMaxSize is a hard limit of max number of SVIDs that would be stored in cache
svidCacheMaxSize int
}

type jwtSvidElement struct {
key string
svid *client.JWTSVID
}

func (c *JWTSVIDCache) CountJWTSVIDs() int {
c.mu.Lock()
defer c.mu.Unlock()

return len(c.svids)
}

func NewJWTSVIDCache(log logrus.FieldLogger, metrics telemetry.Metrics) *JWTSVIDCache {
func NewJWTSVIDCache(log logrus.FieldLogger, metrics telemetry.Metrics, svidCacheMaxSize int) *JWTSVIDCache {
if svidCacheMaxSize <= 0 {
svidCacheMaxSize = DefaultSVIDCacheMaxSize
}
return &JWTSVIDCache{
metrics: metrics,
log: log,
svids: make(map[string]*client.JWTSVID),
metrics: metrics,
log: log,
svids: make(map[string]*list.Element),
lruList: list.New(),
svidCacheMaxSize: svidCacheMaxSize,
}
}

Expand All @@ -43,16 +62,44 @@ func (c *JWTSVIDCache) GetJWTSVID(spiffeID spiffeid.ID, audience []string) (*cli

c.mu.Lock()
defer c.mu.Unlock()
svid, ok := c.svids[key]
return svid, ok

svidElement, ok := c.svids[key]
if !ok {
return nil, ok
}
c.lruList.MoveToFront(svidElement)

return svidElement.Value.(jwtSvidElement).svid, ok
}

func (c *JWTSVIDCache) SetJWTSVID(spiffeID spiffeid.ID, audience []string, svid *client.JWTSVID) {
key := jwtSVIDKey(spiffeID, audience)

c.mu.Lock()
defer c.mu.Unlock()
c.svids[key] = svid

if len(c.svids) >= c.svidCacheMaxSize {
element := c.lruList.Back()
jwtSvidWithHash := element.Value.(jwtSvidElement)
delete(c.svids, jwtSvidWithHash.key)
c.log.Info("removing a svid")
c.lruList.Remove(element)
}

svidElement, ok := c.svids[key]
if ok {
svidElement.Value = jwtSvidElement{
key: key,
svid: svid,
}
c.lruList.MoveToFront(svidElement)
} else {
svidElement = c.lruList.PushFront(jwtSvidElement{
key: key,
svid: svid,
})
c.svids[key] = svidElement
}
}

func (c *JWTSVIDCache) TaintJWTSVIDs(ctx context.Context, taintedJWTAuthorities map[string]struct{}) {
Expand All @@ -64,14 +111,18 @@ func (c *JWTSVIDCache) TaintJWTSVIDs(ctx context.Context, taintedJWTAuthorities

removedKeyIDs := make(map[string]int)
totalCount := 0
for key, jwtSVID := range c.svids {
keyID, err := getKeyIDFromSVIDToken(jwtSVID.Token)
for key, element := range c.svids {
jwtSvidElement := element.Value.(jwtSvidElement)
keyID, err := getKeyIDFromSVIDToken(jwtSvidElement.svid.Token)
if err != nil {
c.log.WithError(err).Error("Could not get key ID from cached JWT-SVID")
continue
}

if _, tainted := taintedJWTAuthorities[keyID]; tainted {
delete(c.svids, key)
c.lruList.Remove(element)

removedKeyIDs[keyID]++
totalCount++
}
Expand Down
68 changes: 55 additions & 13 deletions pkg/agent/manager/cache/jwt_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
now := time.Now()
tok1 := "eyJhbGciOiJFUzI1NiIsImtpZCI6ImRaRGZZaXcxdUd6TXdkTVlITDdGRVl5SzhIT0tLd0xYIiwidHlwIjoiSldUIn0.eyJhdWQiOlsidGVzdC1hdWRpZW5jZSJdLCJleHAiOjE3MjQzNjU3MzEsImlhdCI6MTcyNDI3OTQwNywic3ViIjoic3BpZmZlOi8vZXhhbXBsZS5vcmcvYWdlbnQvZGJ1c2VyIn0.dFr-oWhm5tK0bBuVXt-sGESM5l7hhoY-Gtt5DkuFoJL5Y9d4ZfmicCvUCjL4CqDB3BO_cPqmFfrO7H7pxQbGLg"
tok2 := "eyJhbGciOiJFUzI1NiIsImtpZCI6ImNKMXI5TVY4OTZTWXBMY0RMUjN3Q29QRHprTXpkN25tIiwidHlwIjoiSldUIn0.eyJhdWQiOlsidGVzdC1hdWRpZW5jZSJdLCJleHAiOjE3Mjg1NzEwMjUsImlhdCI6MTcyODU3MDcyNSwic3ViIjoic3BpZmZlOi8vZXhhbXBsZS5vcmcvYWdlbnQvZGJ1c2VyIn0.1YnDj7nknwIHEuNKEN0cNypXKS4SUeILXlNOsOs2XElHzfKhhDcl0sYKYtQc1Itf6cygz9C16VOQ_Yjoos2Qfg"
jwtSVID := &client.JWTSVID{Token: tok1, IssuedAt: now, ExpiresAt: now.Add(time.Second)}
jwtSVID1 := &client.JWTSVID{Token: tok1, IssuedAt: now, ExpiresAt: now.Add(time.Second)}
jwtSVID2 := &client.JWTSVID{Token: tok2, IssuedAt: now, ExpiresAt: now.Add(time.Second)}
//jwtSVID3 := &client.JWTSVID{Token: tok2, IssuedAt: now, ExpiresAt: now.Add(time.Second)}

Check failure on line 26 in pkg/agent/manager/cache/jwt_cache_test.go

View workflow job for this annotation

GitHub Actions / lint (linux)

commentFormatting: put a space between `//` and comment text (gocritic)

Check failure on line 26 in pkg/agent/manager/cache/jwt_cache_test.go

View workflow job for this annotation

GitHub Actions / lint (windows)

commentFormatting: put a space between `//` and comment text (gocritic)

fakeMetrics := fakemetrics.New()
log, logHook := test.NewNullLogger()
log.Level = logrus.DebugLevel
cache := NewJWTSVIDCache(log, fakeMetrics)
cache := NewJWTSVIDCache(log, fakeMetrics, 8)

spiffeID := spiffeid.RequireFromString("spiffe://example.org/blog")

Expand All @@ -37,10 +38,10 @@
assert.Nil(t, actual)

// JWT is cached
cache.SetJWTSVID(spiffeID, []string{"bar"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"bar"}, jwtSVID1)
actual, ok = cache.GetJWTSVID(spiffeID, []string{"bar"})
assert.True(t, ok)
assert.Equal(t, jwtSVID, actual)
assert.Equal(t, jwtSVID1, actual)

// Test tainting of JWt-SVIDs
ctx := context.Background()
Expand All @@ -57,7 +58,7 @@
name: "one authority tainted, one JWT-SVID",
taintedKeyIDs: map[string]struct{}{keyID1: {}},
setJWTSVIDsCached: func(cache *JWTSVIDCache) {
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID1)
},
expectLogs: []spiretest.LogEntry{
{
Expand Down Expand Up @@ -93,8 +94,8 @@
name: "one authority tainted, multiple JWT-SVIDs",
taintedKeyIDs: map[string]struct{}{keyID1: {}},
setJWTSVIDsCached: func(cache *JWTSVIDCache) {
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID1)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID1)
},
expectLogs: []spiretest.LogEntry{
{
Expand Down Expand Up @@ -130,8 +131,8 @@
name: "multiple authorities tainted, multiple JWT-SVIDs",
taintedKeyIDs: map[string]struct{}{keyID1: {}, keyID2: {}},
setJWTSVIDsCached: func(cache *JWTSVIDCache) {
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID1)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID1)
cache.SetJWTSVID(spiffeID, []string{"audience-3"}, jwtSVID2)
},
expectLogs: []spiretest.LogEntry{
Expand Down Expand Up @@ -176,8 +177,8 @@
name: "none of the authorities tainted is in cache",
taintedKeyIDs: map[string]struct{}{"not-cached-1": {}, "not-cached-2": {}},
setJWTSVIDsCached: func(cache *JWTSVIDCache) {
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID)
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSVID1)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSVID1)
cache.SetJWTSVID(spiffeID, []string{"audience-3"}, jwtSVID2)
},
expectMetrics: []fakemetrics.MetricItem{
Expand All @@ -203,7 +204,7 @@
} {
tt := tt
t.Run(tt.name, func(t *testing.T) {
cache := NewJWTSVIDCache(log, fakeMetrics)
cache := NewJWTSVIDCache(log, fakeMetrics, 8)
if tt.setJWTSVIDsCached != nil {
tt.setJWTSVIDsCached(cache)
}
Expand All @@ -221,6 +222,47 @@
}
}

func TestJWTSVIDCacheSize(t *testing.T) {
fakeMetrics := fakemetrics.New()
log, _ := test.NewNullLogger()
log.Level = logrus.DebugLevel
cache := NewJWTSVIDCache(log, fakeMetrics, 2)

now := time.Now()
jwtSvid1 := &client.JWTSVID{Token: "1", IssuedAt: now, ExpiresAt: now.Add(time.Second)}
jwtSvid2 := &client.JWTSVID{Token: "3", IssuedAt: now, ExpiresAt: now.Add(time.Second)}
jwtSvid3 := &client.JWTSVID{Token: "3", IssuedAt: now, ExpiresAt: now.Add(time.Second)}

spiffeID := spiffeid.RequireFromString("spiffe://example.org/blog")
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSvid1)
cache.SetJWTSVID(spiffeID, []string{"audience-2"}, jwtSvid2)
cache.SetJWTSVID(spiffeID, []string{"audience-3"}, jwtSvid3)

_, ok := cache.GetJWTSVID(spiffeID, []string{"audience-1"})
assert.False(t, ok)

actual, ok := cache.GetJWTSVID(spiffeID, []string{"audience-2"})
assert.True(t, ok)
assert.Equal(t, jwtSvid2, actual)

actual, ok = cache.GetJWTSVID(spiffeID, []string{"audience-3"})
assert.True(t, ok)
assert.Equal(t, jwtSvid3, actual)

// Make the second token the most recently used token
_, _ = cache.GetJWTSVID(spiffeID, []string{"audience-2"})

// Insert a token
cache.SetJWTSVID(spiffeID, []string{"audience-1"}, jwtSvid1)

actual, ok = cache.GetJWTSVID(spiffeID, []string{"audience-2"})
assert.True(t, ok)
assert.Equal(t, jwtSvid2, actual)

_, ok = cache.GetJWTSVID(spiffeID, []string{"audience-3"})
assert.False(t, ok)
}

func TestJWTSVIDCacheKeyHashing(t *testing.T) {
spiffeID := spiffeid.RequireFromString("spiffe://example.org/blog")
now := time.Now()
Expand All @@ -229,7 +271,7 @@
fakeMetrics := fakemetrics.New()
log, _ := test.NewNullLogger()
log.Level = logrus.DebugLevel
cache := NewJWTSVIDCache(log, fakeMetrics)
cache := NewJWTSVIDCache(log, fakeMetrics, 8)
cache.SetJWTSVID(spiffeID, []string{"ab", "cd"}, expected)

// JWT is cached
Expand Down
27 changes: 12 additions & 15 deletions pkg/agent/manager/cache/lru_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
)

const (
// DefaultSVIDCacheMaxSize is set when svidCacheMaxSize is not provided
// DefaultSVIDCacheMaxSize is set when x509SvidCacheMaxSize is not provided
DefaultSVIDCacheMaxSize = 1000
// SVIDSyncInterval is the interval at which SVIDs are synced with subscribers
SVIDSyncInterval = 500 * time.Millisecond
Expand Down Expand Up @@ -141,7 +141,7 @@ type LRUCache struct {
svids map[string]*X509SVID

// svidCacheMaxSize is a soft limit of max number of SVIDs that would be stored in cache
svidCacheMaxSize int
x509SvidCacheMaxSize int

subscribeBackoffFn func() backoff.BackOff

Expand All @@ -150,14 +150,14 @@ type LRUCache struct {
taintedBatchProcessedCh chan struct{}
}

func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundle *Bundle, metrics telemetry.Metrics, svidCacheMaxSize int, clk clock.Clock) *LRUCache {
if svidCacheMaxSize <= 0 {
svidCacheMaxSize = DefaultSVIDCacheMaxSize
func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundle *Bundle, metrics telemetry.Metrics, x509SvidCacheMaxSize int, jwtSvidCacheMaxSize int, clk clock.Clock) *LRUCache {
if x509SvidCacheMaxSize <= 0 {
x509SvidCacheMaxSize = DefaultSVIDCacheMaxSize
}

return &LRUCache{
BundleCache: NewBundleCache(trustDomain, bundle),
JWTSVIDCache: NewJWTSVIDCache(log, metrics),
JWTSVIDCache: NewJWTSVIDCache(log, metrics, jwtSvidCacheMaxSize),

log: log,
metrics: metrics,
Expand All @@ -168,9 +168,9 @@ func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundl
bundles: map[spiffeid.TrustDomain]*spiffebundle.Bundle{
trustDomain: bundle,
},
svids: make(map[string]*X509SVID),
svidCacheMaxSize: svidCacheMaxSize,
clk: clk,
svids: make(map[string]*X509SVID),
x509SvidCacheMaxSize: x509SvidCacheMaxSize,
clk: clk,
subscribeBackoffFn: func() backoff.BackOff {
return backoff.NewBackoff(clk, SVIDSyncInterval)
},
Expand Down Expand Up @@ -218,10 +218,7 @@ func (c *LRUCache) CountX509SVIDs() int {
}

func (c *LRUCache) CountJWTSVIDs() int {
c.mu.RLock()
defer c.mu.RUnlock()

return len(c.JWTSVIDCache.svids)
return c.JWTSVIDCache.CountJWTSVIDs()
}

func (c *LRUCache) CountRecords() int {
Expand Down Expand Up @@ -442,7 +439,7 @@ func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.R
// entries with active subscribers which are not cached will be put in staleEntries map;
// irrespective of what svid cache size as we cannot deny identity to a subscriber
activeSubsByEntryID, recordsWithLastAccessTime := c.syncSVIDsWithSubscribers()
extraSize := len(c.svids) - c.svidCacheMaxSize
extraSize := len(c.svids) - c.x509SvidCacheMaxSize

// delete svids without subscribers and which have not been accessed since svidCacheExpiryTime
if extraSize > 0 {
Expand Down Expand Up @@ -781,7 +778,7 @@ func (c *LRUCache) syncSVIDsWithSubscribers() (map[string]struct{}, []recordAcce
lastAccessTimestamps = append(lastAccessTimestamps, newRecordAccessEvent(record.lastAccessTimestamp, id))
}

remainderSize := c.svidCacheMaxSize - len(c.svids)
remainderSize := c.x509SvidCacheMaxSize - len(c.svids)
// add records which are not cached for remainder of cache size
for id := range c.records {
if len(c.staleEntries) >= remainderSize {
Expand Down
Loading
Loading