Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v16] Create DiscoverEC2 User Tasks when Auto Discover fails on EC2 instances #47618

Open
wants to merge 6 commits into
base: branch/v16
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 57 additions & 32 deletions api/gen/proto/go/teleport/usertasks/v1/user_tasks.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions api/proto/teleport/usertasks/v1/user_tasks.proto
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ message DiscoverEC2 {
string account_id = 2;
// Region is the AWS Region where Teleport failed to enroll EC2 instances.
string region = 3;
// SSMDocument is the Amazon Systems Manager SSM Document name that was used to install teleport on the instance.
// In Amazon console, the document is at:
// https://REGION.console.aws.amazon.com/systems-manager/documents/SSM_DOCUMENT/description
string ssm_document = 4;
// InstallerScript is the Teleport installer script that was used to install teleport on the instance.
string installer_script = 5;
}

// DiscoverEC2Instance contains the result of enrolling an AWS EC2 Instance.
Expand Down
44 changes: 25 additions & 19 deletions api/types/usertasks/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,40 +90,40 @@ const (
// This value is used to populate the UserTasks.Spec.IssueType for Discover EC2 tasks.
// The Web UI will then use those identifiers to show detailed instructions on how to fix the issue.
const (
// AutoDiscoverEC2IssueScriptInstanceNotRegistered is used to identify instances that failed to auto-enroll
// AutoDiscoverEC2IssueSSMInstanceNotRegistered is used to identify instances that failed to auto-enroll
// because they are not present in Amazon Systems Manager.
// This usually means that the Instance does not have the SSM Agent running,
// or that the instance's IAM Profile does not allow have the managed IAM Policy AmazonSSMManagedInstanceCore assigned to it.
AutoDiscoverEC2IssueScriptInstanceNotRegistered = "ec2-ssm-agent-not-registered"
AutoDiscoverEC2IssueSSMInstanceNotRegistered = "ec2-ssm-agent-not-registered"

// AutoDiscoverEC2IssueScriptInstanceConnectionLost is used to identify instances that failed to auto-enroll
// AutoDiscoverEC2IssueSSMInstanceConnectionLost is used to identify instances that failed to auto-enroll
// because the agent lost connection to Amazon Systems Manager.
// This can happen if the user changed some setting in the instance's network or IAM profile.
AutoDiscoverEC2IssueScriptInstanceConnectionLost = "ec2-ssm-agent-connection-lost"
AutoDiscoverEC2IssueSSMInstanceConnectionLost = "ec2-ssm-agent-connection-lost"

// AutoDiscoverEC2IssueScriptInstanceUnsupportedOS is used to identify instances that failed to auto-enroll
// AutoDiscoverEC2IssueSSMInstanceUnsupportedOS is used to identify instances that failed to auto-enroll
// because its OS is not supported by teleport.
// This can happen if the instance is running Windows.
AutoDiscoverEC2IssueScriptInstanceUnsupportedOS = "ec2-ssm-unsupported-os"
AutoDiscoverEC2IssueSSMInstanceUnsupportedOS = "ec2-ssm-unsupported-os"

// AutoDiscoverEC2IssueScriptFailure is used to identify instances that failed to auto-enroll
// AutoDiscoverEC2IssueSSMScriptFailure is used to identify instances that failed to auto-enroll
// because the installation script failed.
// The invocation url must be included in the report, so that users can see what was wrong.
AutoDiscoverEC2IssueScriptFailure = "ec2-ssm-script-failure"
AutoDiscoverEC2IssueSSMScriptFailure = "ec2-ssm-script-failure"

// AutoDiscoverEC2IssueInvocationFailure is used to identify instances that failed to auto-enroll
// AutoDiscoverEC2IssueSSMInvocationFailure is used to identify instances that failed to auto-enroll
// because the SSM Script Run (also known as Invocation) failed.
// This happens when there's a failure with permissions or an invalid configuration (eg, invalid document name).
AutoDiscoverEC2IssueInvocationFailure = "ec2-ssm-invocation-failure"
AutoDiscoverEC2IssueSSMInvocationFailure = "ec2-ssm-invocation-failure"
)

// discoverEC2IssueTypes is a list of issue types that can occur when trying to auto enroll EC2 instances.
var discoverEC2IssueTypes = []string{
AutoDiscoverEC2IssueScriptInstanceNotRegistered,
AutoDiscoverEC2IssueScriptInstanceConnectionLost,
AutoDiscoverEC2IssueScriptInstanceUnsupportedOS,
AutoDiscoverEC2IssueScriptFailure,
AutoDiscoverEC2IssueInvocationFailure,
AutoDiscoverEC2IssueSSMInstanceNotRegistered,
AutoDiscoverEC2IssueSSMInstanceConnectionLost,
AutoDiscoverEC2IssueSSMInstanceUnsupportedOS,
AutoDiscoverEC2IssueSSMScriptFailure,
AutoDiscoverEC2IssueSSMInvocationFailure,
}

// ValidateUserTask validates the UserTask object without modifying it.
Expand Down Expand Up @@ -215,10 +215,12 @@ func validateDiscoverEC2TaskType(ut *usertasksv1.UserTask) error {
// TaskNameForDiscoverEC2Parts are the fields that deterministically compute a Discover EC2 task name.
// To be used with TaskNameForDiscoverEC2 function.
type TaskNameForDiscoverEC2Parts struct {
Integration string
IssueType string
AccountID string
Region string
Integration string
IssueType string
AccountID string
Region string
SSMDocument string
InstallerScript string
}

// TaskNameForDiscoverEC2 returns a deterministic name for the DiscoverEC2 task type.
Expand All @@ -233,6 +235,10 @@ func TaskNameForDiscoverEC2(parts TaskNameForDiscoverEC2Parts) string {
bs = append(bs, []byte(parts.AccountID)...)
bs = append(bs, binary.LittleEndian.AppendUint64(nil, uint64(len(parts.Region)))...)
bs = append(bs, []byte(parts.Region)...)
bs = append(bs, binary.LittleEndian.AppendUint64(nil, uint64(len(parts.SSMDocument)))...)
bs = append(bs, []byte(parts.SSMDocument)...)
bs = append(bs, binary.LittleEndian.AppendUint64(nil, uint64(len(parts.InstallerScript)))...)
bs = append(bs, []byte(parts.InstallerScript)...)
return uuid.NewSHA1(discoverEC2Namespace, bs).String()
}

Expand Down
2 changes: 1 addition & 1 deletion api/types/usertasks/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ func TestNewDiscoverEC2UserTask(t *testing.T) {
Kind: "user_task",
Version: "v1",
Metadata: &headerv1.Metadata{
Name: "154e1429-da26-5ce2-add2-b0e77a27dd96",
Name: "f36b8798-fdec-59fe-8bd0-33f4890ced05",
Expires: userTaskExpirationTimestamp,
},
Spec: baseEC2DiscoverTaskSpec,
Expand Down
3 changes: 3 additions & 0 deletions lib/auth/authclient/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,9 @@ type ReadDiscoveryAccessPoint interface {

// GetProxies returns a list of registered proxies.
GetProxies() ([]types.Server, error)

// GetUserTask gets a single User Task by its name.
GetUserTask(ctx context.Context, name string) (*usertasksv1.UserTask, error)
}

// DiscoveryAccessPoint is an API interface implemented by a certificate authority (CA) to be
Expand Down
28 changes: 27 additions & 1 deletion lib/srv/discovery/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,18 @@ import (
"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"
"github.com/sirupsen/logrus"
"google.golang.org/protobuf/types/known/timestamppb"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/gravitational/teleport"
"github.com/gravitational/teleport/api/client/proto"
usertasksv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/usertasks/v1"
usageeventsv1 "github.com/gravitational/teleport/api/gen/proto/go/usageevents/v1"
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/api/types/discoveryconfig"
apievents "github.com/gravitational/teleport/api/types/events"
"github.com/gravitational/teleport/api/types/usertasks"
"github.com/gravitational/teleport/api/utils/retryutils"
"github.com/gravitational/teleport/lib/auth/authclient"
"github.com/gravitational/teleport/lib/cloud"
Expand Down Expand Up @@ -329,6 +332,7 @@ type Server struct {

awsSyncStatus awsSyncStatus
awsEC2ResourcesStatus awsResourcesStatus
awsEC2Tasks awsEC2Tasks

// caRotationCh receives nodes that need to have their CAs rotated.
caRotationCh chan []types.Server
Expand Down Expand Up @@ -459,7 +463,8 @@ func (s *Server) initAWSWatchers(matchers []types.AWSMatcher) error {
server.WithPollInterval(s.PollInterval),
server.WithTriggerFetchC(s.newDiscoveryConfigChangedSub()),
server.WithPreFetchHookFn(func() {
s.awsEC2ResourcesStatus.iterationStarted()
s.awsEC2ResourcesStatus.reset()
s.awsEC2Tasks.reset()
}),
)
if err != nil {
Expand Down Expand Up @@ -972,6 +977,26 @@ func (s *Server) handleEC2RemoteInstallation(instances *server.EC2Instances) err
discoveryConfig: instances.DiscoveryConfig,
integration: instances.Integration,
}, len(req.Instances))

for _, instance := range req.Instances {
s.awsEC2Tasks.addFailedEnrollment(
awsEC2TaskKey{
accountID: instances.AccountID,
integration: instances.Integration,
issueType: usertasks.AutoDiscoverEC2IssueSSMInvocationFailure,
region: instances.Region,
ssmDocument: req.DocumentName,
installerScript: req.InstallerScriptName(),
},
&usertasksv1.DiscoverEC2Instance{
DiscoveryConfig: instances.DiscoveryConfig,
DiscoveryGroup: s.DiscoveryGroup,
InstanceId: instance.InstanceID,
Name: instance.InstanceName,
SyncTime: timestamppb.New(s.clock.Now()),
},
)
}
return trace.Wrap(err)
}
return nil
Expand Down Expand Up @@ -1084,6 +1109,7 @@ func (s *Server) handleEC2Discovery() {
}

s.updateDiscoveryConfigStatus(instances.EC2.DiscoveryConfig)
s.upsertTasksForAWSEC2FailedEnrollments()
case <-s.ctx.Done():
s.ec2Watcher.Stop()
return
Expand Down
Loading
Loading