Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poc force rotation jwt authority it #527

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ require (
github.com/zeebo/errs v1.3.0
golang.org/x/crypto v0.28.0
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
golang.org/x/net v0.29.0
golang.org/x/net v0.30.0
golang.org/x/sync v0.8.0
golang.org/x/sys v0.26.0
golang.org/x/time v0.7.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1662,8 +1662,8 @@ golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down
75 changes: 71 additions & 4 deletions pkg/agent/manager/cache/jwt_cache.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
package cache

import (
"context"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"io"
"sort"
"strings"
"sync"

"github.com/go-jose/go-jose/v4/jwt"
"github.com/sirupsen/logrus"
"github.com/spiffe/go-spiffe/v2/spiffeid"
"github.com/spiffe/spire/pkg/agent/client"
"github.com/spiffe/spire/pkg/common/jwtsvid"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/telemetry/agent"
)

type JWTSVIDCache struct {
mu sync.Mutex
svids map[string]*client.JWTSVID
log logrus.FieldLogger
metrics telemetry.Metrics
mu sync.RWMutex
svids map[string]*client.JWTSVID
}

func (c *JWTSVIDCache) CountJWTSVIDs() int {
return len(c.svids)
}

func NewJWTSVIDCache() *JWTSVIDCache {
func NewJWTSVIDCache(log logrus.FieldLogger, metrics telemetry.Metrics) *JWTSVIDCache {
return &JWTSVIDCache{
svids: make(map[string]*client.JWTSVID),
metrics: metrics,
log: log,
svids: make(map[string]*client.JWTSVID),
}
}

Expand All @@ -43,6 +56,60 @@ func (c *JWTSVIDCache) SetJWTSVID(spiffeID spiffeid.ID, audience []string, svid
c.svids[key] = svid
}

func (c *JWTSVIDCache) TaintJWTSVIDs(ctx context.Context, taintedJWTAuthorities map[string]struct{}) {
c.mu.Lock()
defer c.mu.Unlock()

counter := telemetry.StartCall(c.metrics, telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedJWTSVIDs)
defer counter.Done(nil)

var taintedKeyIDs []string
svidsRemoved := 0
for key, jwtSVID := range c.svids {
keyID, err := getKeyIDFromSVIDToken(jwtSVID.Token)
if err != nil {
c.log.Error(err)
continue
}
if _, tainted := taintedJWTAuthorities[keyID]; tainted {
delete(c.svids, key)
taintedKeyIDs = append(taintedKeyIDs, keyID)
svidsRemoved++
}
select {
case <-ctx.Done():
c.log.WithError(ctx.Err()).Warn("Context cancelled, exiting process of tainting JWT-SVIDs in cache")
return
default:
}
}
taintedKeyIDsCount := len(taintedKeyIDs)
if taintedKeyIDsCount > 0 {
c.log.WithField(telemetry.JWTAuthorityKeyIDs, strings.Join(taintedKeyIDs, ",")).
WithField(telemetry.CountJWTSVIDs, svidsRemoved).
Info("JWT-SVIDs were removed from the JWT cache because they were issued by a tainted authority")
}
agent.AddCacheManagerTaintedJWTSVIDsSample(c.metrics, agent.CacheTypeWorkload, float32(taintedKeyIDsCount))
}

func getKeyIDFromSVIDToken(svidToken string) (string, error) {
token, err := jwt.ParseSigned(svidToken, jwtsvid.AllowedSignatureAlgorithms)
if err != nil {
return "", fmt.Errorf("failed to parse JWT-SVID: %w", err)
}

if len(token.Headers) != 1 {
return "", fmt.Errorf("malformed JWT-SVID: expected a single token header; got %d", len(token.Headers))
}

keyID := token.Headers[0].KeyID
if keyID == "" {
return "", errors.New("missing key ID in token header of minted JWT-SVID")
}

return keyID, nil
}

func jwtSVIDKey(spiffeID spiffeid.ID, audience []string) string {
h := sha256.New()

Expand Down
62 changes: 59 additions & 3 deletions pkg/agent/manager/cache/jwt_cache_test.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
package cache

import (
"context"
"testing"
"time"

"github.com/hashicorp/go-metrics"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/spiffe/go-spiffe/v2/spiffeid"
"github.com/spiffe/spire/pkg/agent/client"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/telemetry/agent"
"github.com/spiffe/spire/test/fakes/fakemetrics"
"github.com/spiffe/spire/test/spiretest"
"github.com/stretchr/testify/assert"
)

func TestJWTSVIDCacheBasic(t *testing.T) {
now := time.Now()
expected := &client.JWTSVID{Token: "X", IssuedAt: now, ExpiresAt: now.Add(time.Second)}
tok := "eyJhbGciOiJFUzI1NiIsImtpZCI6ImRaRGZZaXcxdUd6TXdkTVlITDdGRVl5SzhIT0tLd0xYIiwidHlwIjoiSldUIn0.eyJhdWQiOlsidGVzdC1hdWRpZW5jZSJdLCJleHAiOjE3MjQzNjU3MzEsImlhdCI6MTcyNDI3OTQwNywic3ViIjoic3BpZmZlOi8vZXhhbXBsZS5vcmcvYWdlbnQvZGJ1c2VyIn0.dFr-oWhm5tK0bBuVXt-sGESM5l7hhoY-Gtt5DkuFoJL5Y9d4ZfmicCvUCjL4CqDB3BO_cPqmFfrO7H7pxQbGLg"
keyID := "dZDfYiw1uGzMwdMYHL7FEYyK8HOKKwLX"
expected := &client.JWTSVID{Token: tok, IssuedAt: now, ExpiresAt: now.Add(time.Second)}

cache := NewJWTSVIDCache()
fakeMetrics := fakemetrics.New()
log, hook := test.NewNullLogger()
log.Level = logrus.DebugLevel
cache := NewJWTSVIDCache(log, fakeMetrics)

spiffeID := spiffeid.RequireFromString("spiffe://example.org/blog")

Expand All @@ -27,14 +40,57 @@ func TestJWTSVIDCacheBasic(t *testing.T) {
actual, ok = cache.GetJWTSVID(spiffeID, []string{"bar"})
assert.True(t, ok)
assert.Equal(t, expected, actual)

// Remove tainted authority, should not be cached anymore
cache.TaintJWTSVIDs(context.Background(), map[string]struct{}{keyID: {}})
actual, ok = cache.GetJWTSVID(spiffeID, []string{"bar"})
assert.False(t, ok)
assert.Nil(t, actual)

// Assert logs and metrics
expectLogs := []spiretest.LogEntry{
{
Level: logrus.InfoLevel,
Message: "JWT-SVIDs were removed from the JWT cache because they were issued by a tainted authority",
Data: logrus.Fields{
telemetry.CountJWTSVIDs: "1",
telemetry.JWTAuthorityKeyIDs: keyID,
},
},
}
expectMetrics := []fakemetrics.MetricItem{
{
Type: fakemetrics.AddSampleType,
Key: []string{telemetry.CacheManager, telemetry.CountJWTSVIDs, agent.CacheTypeWorkload},
Val: 1,
},
{
Type: fakemetrics.IncrCounterWithLabelsType,
Key: []string{telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedJWTSVIDs},
Val: 1,
Labels: []metrics.Label{{Name: "status", Value: "OK"}},
},
{
Type: fakemetrics.MeasureSinceWithLabelsType,
Key: []string{telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedJWTSVIDs, telemetry.ElapsedTime},
Val: 0,
Labels: []metrics.Label{{Name: "status", Value: "OK"}},
},
}

spiretest.AssertLogs(t, hook.AllEntries(), expectLogs)
assert.Equal(t, expectMetrics, fakeMetrics.AllMetrics())
}

func TestJWTSVIDCacheKeyHashing(t *testing.T) {
spiffeID := spiffeid.RequireFromString("spiffe://example.org/blog")
now := time.Now()
expected := &client.JWTSVID{Token: "X", IssuedAt: now, ExpiresAt: now.Add(time.Second)}

cache := NewJWTSVIDCache()
fakeMetrics := fakemetrics.New()
log, _ := test.NewNullLogger()
log.Level = logrus.DebugLevel
cache := NewJWTSVIDCache(log, fakeMetrics)
cache.SetJWTSVID(spiffeID, []string{"ab", "cd"}, expected)

// JWT is cached
Expand Down
11 changes: 6 additions & 5 deletions pkg/agent/manager/cache/lru_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/spiffe/go-spiffe/v2/spiffeid"
"github.com/spiffe/spire/pkg/common/backoff"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/telemetry/agent"
agentmetrics "github.com/spiffe/spire/pkg/common/telemetry/agent"
"github.com/spiffe/spire/pkg/common/x509util"
"github.com/spiffe/spire/proto/spire/common"
Expand Down Expand Up @@ -42,7 +43,7 @@ type UpdateEntries struct {
TaintedX509Authorities []string

// TaintedJWTAuthorities is a set of all tainted JWT authorities notified by the server.
TaintedJWTAuthorities []string
TaintedJWTAuthorities map[string]struct{}

// RegistrationEntries is a set of all registration entries available to the
// agent, keyed by registration entry id.
Expand Down Expand Up @@ -156,7 +157,7 @@ func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundl

return &LRUCache{
BundleCache: NewBundleCache(trustDomain, bundle),
JWTSVIDCache: NewJWTSVIDCache(),
JWTSVIDCache: NewJWTSVIDCache(log, metrics),

log: log,
metrics: metrics,
Expand Down Expand Up @@ -635,7 +636,7 @@ func (c *LRUCache) notifyTaintedBatchProcessed() {

// processTaintedSVIDs identifies and removes tainted SVIDs from the cache that have been signed by the given tainted authorities.
func (c *LRUCache) processTaintedSVIDs(entryIDs []string, taintedX509Authorities []*x509.Certificate) {
counter := telemetry.StartCall(c.metrics, telemetry.CacheManager, "", telemetry.ProcessTaintedSVIDs)
counter := telemetry.StartCall(c.metrics, telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedX509SVIDs)
defer counter.Done(nil)

taintedSVIDs := 0
Expand Down Expand Up @@ -664,8 +665,8 @@ func (c *LRUCache) processTaintedSVIDs(entryIDs []string, taintedX509Authorities
}
}

agentmetrics.AddCacheManagerTaintedSVIDsSample(c.metrics, "", float32(taintedSVIDs))
c.log.WithField(telemetry.TaintedSVIDs, taintedSVIDs).Info("Tainted X.509 SVIDs")
agentmetrics.AddCacheManagerTaintedX509SVIDsSample(c.metrics, agentmetrics.CacheTypeWorkload, float32(taintedSVIDs))
c.log.WithField(telemetry.TaintedX509SVIDs, taintedSVIDs).Info("Tainted X.509 SVIDs")
}

// Notify subscriber of selector set only if all SVIDs for corresponding selector set are cached
Expand Down
17 changes: 9 additions & 8 deletions pkg/agent/manager/cache/lru_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/spiffe/go-spiffe/v2/bundle/spiffebundle"
"github.com/spiffe/go-spiffe/v2/spiffeid"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/telemetry/agent"
"github.com/spiffe/spire/proto/spire/common"
"github.com/spiffe/spire/test/clock"
"github.com/spiffe/spire/test/fakes/fakemetrics"
Expand Down Expand Up @@ -1033,7 +1034,7 @@ func TestTaintX509SVIDs(t *testing.T) {
expectElapsedTimeMetric := []fakemetrics.MetricItem{
{
Type: fakemetrics.IncrCounterWithLabelsType,
Key: []string{"cache_manager", "", "process_tainted_svids"},
Key: []string{telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedX509SVIDs},
Val: 1,
Labels: []telemetry.Label{
{
Expand All @@ -1044,7 +1045,7 @@ func TestTaintX509SVIDs(t *testing.T) {
},
{
Type: fakemetrics.MeasureSinceWithLabelsType,
Key: []string{"cache_manager", "", "process_tainted_svids", "elapsed_time"},
Key: []string{telemetry.CacheManager, agent.CacheTypeWorkload, telemetry.ProcessTaintedX509SVIDs, telemetry.ElapsedTime},
Val: 0,
Labels: []telemetry.Label{
{
Expand All @@ -1070,7 +1071,7 @@ func TestTaintX509SVIDs(t *testing.T) {
{
Level: logrus.InfoLevel,
Message: "Tainted X.509 SVIDs",
Data: logrus.Fields{telemetry.TaintedSVIDs: "3"},
Data: logrus.Fields{telemetry.TaintedX509SVIDs: "3"},
},
{
Level: logrus.InfoLevel,
Expand All @@ -1079,7 +1080,7 @@ func TestTaintX509SVIDs(t *testing.T) {
},
}
expectMetrics := append([]fakemetrics.MetricItem{
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, "", telemetry.TaintedSVIDs}, Val: 3}},
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, telemetry.TaintedX509SVIDs, agent.CacheTypeWorkload}, Val: 3}},
expectElapsedTimeMetric...)
assertBatchProcess(expectLog, expectMetrics, "e3", "e4", "e5", "e6", "e7", "e8", "e9")

Expand All @@ -1091,7 +1092,7 @@ func TestTaintX509SVIDs(t *testing.T) {
{
Level: logrus.InfoLevel,
Message: "Tainted X.509 SVIDs",
Data: logrus.Fields{telemetry.TaintedSVIDs: "3"},
Data: logrus.Fields{telemetry.TaintedX509SVIDs: "3"},
},
{
Level: logrus.InfoLevel,
Expand All @@ -1100,7 +1101,7 @@ func TestTaintX509SVIDs(t *testing.T) {
},
}
expectMetrics = append([]fakemetrics.MetricItem{
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, "", telemetry.TaintedSVIDs}, Val: 3}},
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, telemetry.TaintedX509SVIDs, agent.CacheTypeWorkload}, Val: 3}},
expectElapsedTimeMetric...)
assertBatchProcess(expectLog, expectMetrics, "e3", "e4", "e8", "e9")

Expand All @@ -1112,15 +1113,15 @@ func TestTaintX509SVIDs(t *testing.T) {
{
Level: logrus.InfoLevel,
Message: "Tainted X.509 SVIDs",
Data: logrus.Fields{telemetry.TaintedSVIDs: "2"},
Data: logrus.Fields{telemetry.TaintedX509SVIDs: "2"},
},
{
Level: logrus.InfoLevel,
Message: "Finished processing all tainted entries",
},
}
expectMetrics = append([]fakemetrics.MetricItem{
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, "", telemetry.TaintedSVIDs}, Val: 2}},
{Type: fakemetrics.AddSampleType, Key: []string{telemetry.CacheManager, telemetry.TaintedX509SVIDs, agent.CacheTypeWorkload}, Val: 2}},
expectElapsedTimeMetric...)
assertBatchProcess(expectLog, expectMetrics, "e3", "e4")
}
Expand Down
11 changes: 8 additions & 3 deletions pkg/agent/manager/storecache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/spiffe/go-spiffe/v2/spiffeid"
"github.com/spiffe/spire/pkg/agent/manager/cache"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/telemetry/agent"
telemetry_agent "github.com/spiffe/spire/pkg/common/telemetry/agent"
"github.com/spiffe/spire/pkg/common/x509util"
"github.com/spiffe/spire/proto/spire/common"
Expand Down Expand Up @@ -228,7 +229,7 @@ func (c *Cache) TaintX509SVIDs(ctx context.Context, taintedX509Authorities []*x5
c.mtx.Lock()
defer c.mtx.Unlock()

counter := telemetry.StartCall(c.c.Metrics, telemetry.CacheManager, "svid_store", telemetry.ProcessTaintedSVIDs)
counter := telemetry.StartCall(c.c.Metrics, telemetry.CacheManager, agent.CacheTypeSVIDStore, telemetry.ProcessTaintedX509SVIDs)
defer counter.Done(nil)

taintedSVIDs := 0
Expand All @@ -252,8 +253,12 @@ func (c *Cache) TaintX509SVIDs(ctx context.Context, taintedX509Authorities []*x5
}
}

telemetry_agent.AddCacheManagerExpiredSVIDsSample(c.c.Metrics, "svid_store", float32(taintedSVIDs))
c.c.Log.WithField(telemetry.TaintedSVIDs, taintedSVIDs).Info("Tainted X.509 SVIDs")
telemetry_agent.AddCacheManagerExpiredSVIDsSample(c.c.Metrics, agent.CacheTypeSVIDStore, float32(taintedSVIDs))
c.c.Log.WithField(telemetry.TaintedX509SVIDs, taintedSVIDs).Info("Tainted X.509 SVIDs")
}

func (c *Cache) TaintJWTSVIDs(ctx context.Context, taintedJWTAuthorities map[string]struct{}) {
// Nothing to do here
}

// GetStaleEntries obtains a list of stale entries, that needs new SVIDs
Expand Down
Loading
Loading