From 6b0fa55c429bd1dbcff17ebde50a9e0605c4ee27 Mon Sep 17 00:00:00 2001 From: Jason Harper Date: Mon, 18 Nov 2024 10:33:01 -0800 Subject: [PATCH] Improve error handling when cannot connect to target(s) and when target doesn't meet requirements for metrics command (#95) * handle connection errors * confirm metric targets have same architecture * metrics error msg when target not supported due to failure to read events --- cmd/config/config.go | 11 +++- cmd/metrics/metadata.go | 15 ++--- cmd/metrics/metrics.go | 121 +++++++++++++++++++++++++------------ internal/common/common.go | 25 +++++--- internal/common/targets.go | 53 ++++++++-------- 5 files changed, 144 insertions(+), 81 deletions(-) diff --git a/cmd/config/config.go b/cmd/config/config.go index 48eb5c0..7ae7e93 100644 --- a/cmd/config/config.go +++ b/cmd/config/config.go @@ -207,13 +207,22 @@ func runCmd(cmd *cobra.Command, args []string) error { appContext := cmd.Context().Value(common.AppContext{}).(common.AppContext) localTempDir := appContext.TempDir // get the targets - myTargets, err := common.GetTargets(cmd, true, true, localTempDir) + myTargets, targetErrs, err := common.GetTargets(cmd, true, true, localTempDir) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) slog.Error(err.Error()) cmd.SilenceUsage = true return err } + // check for errors in target connections + for _, err := range targetErrs { + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + } if len(myTargets) == 0 { err := fmt.Errorf("no targets specified") fmt.Fprintf(os.Stderr, "Error: %v\n", err) diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go index e41df7c..e58a56a 100644 --- a/cmd/metrics/metadata.go +++ b/cmd/metrics/metadata.go @@ -140,10 +140,10 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem var err error var output string if metadata.SupportsFixedCycles, output, err = getSupportsFixedEvent(myTarget, "cpu-cycles", cpu.MicroArchitecture, noRoot, perfPath, localTempDir); err != nil { - err = fmt.Errorf("failed to determine if fixed-counter cpu-cycles is supported: %v", err) + err = fmt.Errorf("failed to determine if fixed-counter 'cpu-cycles' is supported: %v", err) } else { if !metadata.SupportsFixedCycles { - slog.Warn("Fixed-counter cpu-cycles events not supported", slog.String("output", output)) + slog.Warn("Fixed-counter 'cpu-cycles' events not supported", slog.String("output", output)) } } slowFuncChannel <- err @@ -153,10 +153,10 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem var err error var output string if metadata.SupportsFixedInstructions, output, err = getSupportsFixedEvent(myTarget, "instructions", cpu.MicroArchitecture, noRoot, perfPath, localTempDir); err != nil { - err = fmt.Errorf("failed to determine if fixed-counter instructions is supported: %v", err) + err = fmt.Errorf("failed to determine if fixed-counter 'instructions' is supported: %v", err) } else { if !metadata.SupportsFixedInstructions { - slog.Warn("Fixed-counter instructions events not supported", slog.String("output", output)) + slog.Warn("Fixed-counter 'instructions' events not supported", slog.String("output", output)) } } slowFuncChannel <- err @@ -170,11 +170,8 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem errs = append(errs, <-slowFuncChannel) for _, errInside := range errs { if errInside != nil { - if err == nil { - err = errInside - } else { - err = fmt.Errorf("%v : %v", err, errInside) - } + slog.Error("error loading metadata", slog.String("error", errInside.Error()), slog.String("target", myTarget.GetName())) + err = fmt.Errorf("target not supported, see log for details") } } }() diff --git a/cmd/metrics/metrics.go b/cmd/metrics/metrics.go index 939df72..e55ec14 100644 --- a/cmd/metrics/metrics.go +++ b/cmd/metrics/metrics.go @@ -472,16 +472,17 @@ func validateFlags(cmd *cobra.Command, args []string) error { } type targetContext struct { - target target.Target - err error - perfPath string - tempDir string - metadata Metadata - nmiDisabled bool - perfMuxIntervals map[string]int - groupDefinitions []GroupDefinition - metricDefinitions []MetricDefinition - printedFiles []string + target target.Target + err error + perfPath string + tempDir string + metadata Metadata + nmiDisabled bool + perfMuxIntervalsSet bool + perfMuxIntervals map[string]int + groupDefinitions []GroupDefinition + metricDefinitions []MetricDefinition + printedFiles []string } type targetError struct { @@ -494,7 +495,6 @@ func runCmd(cmd *cobra.Command, args []string) error { appContext := cmd.Context().Value(common.AppContext{}).(common.AppContext) localTempDir := appContext.TempDir localOutputDir := appContext.OutputDir - // handle signals // child processes will exit when the signals are received which will // allow this app to exit normally @@ -505,7 +505,6 @@ func runCmd(cmd *cobra.Command, args []string) error { setSignalReceived() slog.Info("received signal", slog.String("signal", sig.String())) }() - // round up to next perfPrintInterval second (the collection interval used by perf stat) if flagDuration != 0 { qf := float64(flagDuration) / float64(flagPerfPrintInterval) @@ -514,24 +513,15 @@ func runCmd(cmd *cobra.Command, args []string) error { flagDuration = (qi + 1) * flagPerfPrintInterval } } - // get the targets - myTargets, err := common.GetTargets(cmd, !flagNoRoot, !flagNoRoot, localTempDir) + myTargets, targetErrs, err := common.GetTargets(cmd, !flagNoRoot, !flagNoRoot, localTempDir) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) slog.Error(err.Error()) cmd.SilenceUsage = true return err } - if len(myTargets) == 0 { - err := fmt.Errorf("no targets specified") - fmt.Fprintf(os.Stderr, "Error: %v\n", err) - slog.Error(err.Error()) - cmd.SilenceUsage = true - return err - } - targetTempRoot, _ := cmd.Flags().GetString(common.FlagTargetTempDirName) - + // check for live mode with multiple targets if flagLive && len(myTargets) > 1 { err := fmt.Errorf("live mode is only supported for a single target") fmt.Fprintf(os.Stderr, "Error: %v\n", err) @@ -539,7 +529,6 @@ func runCmd(cmd *cobra.Command, args []string) error { cmd.SilenceUsage = true return err } - // create progress spinner multiSpinner := progress.NewMultiSpinner() for _, myTarget := range myTargets { @@ -553,7 +542,48 @@ func runCmd(cmd *cobra.Command, args []string) error { } multiSpinner.Start() defer multiSpinner.Finish() - + // check for errors in target creation + for i := range targetErrs { + if targetErrs[i] != nil { + _ = multiSpinner.Status(myTargets[i].GetName(), fmt.Sprintf("Error: %v", targetErrs[i])) + // remove target from targets list + myTargets = append(myTargets[:i], myTargets[i+1:]...) + } + } + // check if any targets remain + if len(myTargets) == 0 { + err := fmt.Errorf("no targets specified") + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + // check if all targets have the same architecture + for _, target := range myTargets { + tArch, err := target.GetArchitecture() + if err != nil { + err = fmt.Errorf("failed to get architecture: %w", err) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + tArch0, err := myTargets[0].GetArchitecture() + if err != nil { + err = fmt.Errorf("failed to get architecture: %w", err) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + if tArch != tArch0 { + err := fmt.Errorf("all targets must have the same architecture") + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + } // extract perf into local temp directory (assumes all targets have the same architecture) localPerfPath, err := extractPerf(myTargets[0], localTempDir) if err != nil { @@ -562,23 +592,26 @@ func runCmd(cmd *cobra.Command, args []string) error { cmd.SilenceUsage = true return err } - // prepare the targets channelTargetError := make(chan targetError) var targetContexts []targetContext for _, myTarget := range myTargets { targetContexts = append(targetContexts, targetContext{target: myTarget}) } + targetTempRoot, _ := cmd.Flags().GetString(common.FlagTargetTempDirName) for i := range targetContexts { go prepareTarget(&targetContexts[i], targetTempRoot, localTempDir, localPerfPath, channelTargetError, multiSpinner.Status) } + // wait for all targets to be prepared + numPreparedTargets := 0 for range targetContexts { targetError := <-channelTargetError if targetError.err != nil { slog.Error("failed to prepare target", slog.String("target", targetError.target.GetName()), slog.String("error", targetError.err.Error())) + } else { + numPreparedTargets++ } } - // schedule temporary directory cleanup if cmd.Parent().PersistentFlags().Lookup("debug").Value.String() != "true" { // don't remove the directory if we're debugging defer func() { @@ -592,7 +625,6 @@ func runCmd(cmd *cobra.Command, args []string) error { } }() } - // schedule NMI watchdog reset defer func() { for _, targetContext := range targetContexts { @@ -604,28 +636,44 @@ func runCmd(cmd *cobra.Command, args []string) error { } } }() - // schedule mux interval reset defer func() { for _, targetContext := range targetContexts { - err := SetMuxIntervals(targetContext.target, targetContext.perfMuxIntervals, localTempDir) - if err != nil { - slog.Error("failed to reset perf mux intervals", slog.String("target", targetContext.target.GetName()), slog.String("error", err.Error())) + if targetContext.perfMuxIntervalsSet { + err := SetMuxIntervals(targetContext.target, targetContext.perfMuxIntervals, localTempDir) + if err != nil { + slog.Error("failed to reset perf mux intervals", slog.String("target", targetContext.target.GetName()), slog.String("error", err.Error())) + } } } }() - + // check if any targets were successfully prepared + if numPreparedTargets == 0 { + err := fmt.Errorf("no targets were successfully prepared") + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } // prepare the metrics for each target for i := range targetContexts { go prepareMetrics(&targetContexts[i], localTempDir, channelTargetError, multiSpinner.Status) } + // wait for all metrics to be prepared + numTargetsWithPreparedMetrics := 0 for range targetContexts { targetError := <-channelTargetError if targetError.err != nil { slog.Error("failed to prepare metrics", slog.String("target", targetError.target.GetName()), slog.String("error", targetError.err.Error())) + } else { + numTargetsWithPreparedMetrics++ } } - + if numTargetsWithPreparedMetrics == 0 { + err := fmt.Errorf("no targets had metrics prepared") + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } // show metric names and exit, if requested if flagShowMetricNames { // stop the multiSpinner @@ -638,7 +686,6 @@ func runCmd(cmd *cobra.Command, args []string) error { } return nil } - // create the local output directory if !flagLive { err = common.CreateOutputDir(localOutputDir) @@ -649,7 +696,6 @@ func runCmd(cmd *cobra.Command, args []string) error { return err } } - // write metadata to file if flagWriteEventsToFile { for _, targetContext := range targetContexts { @@ -661,7 +707,6 @@ func runCmd(cmd *cobra.Command, args []string) error { } } } - // start the metric collection for i := range targetContexts { if targetContexts[i].err == nil { @@ -796,6 +841,7 @@ func prepareTarget(targetContext *targetContext, targetTempRoot string, localTem channelError <- targetError{target: myTarget, err: err} return } + targetContext.perfMuxIntervalsSet = true } // get the full path to the perf binary if targetContext.perfPath, err = getPerfPath(myTarget, localPerfPath); err != nil { @@ -819,7 +865,6 @@ func prepareMetrics(targetContext *targetContext, localTempDir string, channelEr _ = statusUpdate(myTarget.GetName(), "collecting metadata") var err error if targetContext.metadata, err = LoadMetadata(myTarget, flagNoRoot, targetContext.perfPath, localTempDir); err != nil { - err = fmt.Errorf("failed to load metadata: %w", err) _ = statusUpdate(myTarget.GetName(), fmt.Sprintf("Error: %s", err.Error())) targetContext.err = err channelError <- targetError{target: myTarget, err: err} diff --git a/internal/common/common.go b/internal/common/common.go index 3fc5882..c68f70f 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -148,20 +148,13 @@ func (rc *ReportingCommand) Run() error { } } // get the targets - myTargets, err := GetTargets(rc.Cmd, elevated, false, localTempDir) + myTargets, targetErrs, err := GetTargets(rc.Cmd, elevated, false, localTempDir) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) slog.Error(err.Error()) rc.Cmd.SilenceUsage = true return err } - if len(myTargets) == 0 { - err := fmt.Errorf("no targets specified") - fmt.Fprintf(os.Stderr, "Error: %v\n", err) - slog.Error(err.Error()) - rc.Cmd.SilenceUsage = true - return err - } // setup and start the progress indicator multiSpinner := progress.NewMultiSpinner() for _, target := range myTargets { @@ -174,6 +167,22 @@ func (rc *ReportingCommand) Run() error { } } multiSpinner.Start() + // check for errors in target creation + for i := range targetErrs { + if targetErrs[i] != nil { + _ = multiSpinner.Status(myTargets[i].GetName(), fmt.Sprintf("Error: %v", targetErrs[i])) + // remove target from targets list + myTargets = append(myTargets[:i], myTargets[i+1:]...) + } + } + // check if we have any targets to run the scripts on + if len(myTargets) == 0 { + err := fmt.Errorf("no targets specified") + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + rc.Cmd.SilenceUsage = true + return err + } // run the scripts on the targets channelTargetScriptOutputs := make(chan TargetScriptOutputs) channelError := make(chan error) diff --git a/internal/common/targets.go b/internal/common/targets.go index ab2970d..b4d8fdf 100644 --- a/internal/common/targets.go +++ b/internal/common/targets.go @@ -70,16 +70,13 @@ func GetTargetFlagGroup() FlagGroup { // If a targets file is specified, it reads the targets from the file. // Otherwise, it retrieves a single target using the getTarget function. // The function returns a slice of target.Target and an error if any. -func GetTargets(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantElevate bool, localTempDir string) ([]target.Target, error) { +func GetTargets(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantElevate bool, localTempDir string) ([]target.Target, []error, error) { flagTargetsFile, _ := cmd.Flags().GetString(flagTargetsFileName) if flagTargetsFile != "" { return getTargetsFromFile(flagTargetsFile, localTempDir) } - myTarget, err := getTarget(cmd, needsElevatedPrivileges, failIfCantElevate, localTempDir) - if err != nil { - return nil, err - } - return []target.Target{myTarget}, nil + myTarget, targetErr, err := getTarget(cmd, needsElevatedPrivileges, failIfCantElevate, localTempDir) + return []target.Target{myTarget}, []error{targetErr}, err } // getTarget returns a target.Target object representing the target host and associated details. @@ -90,8 +87,9 @@ func GetTargets(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantElev // - localTempDir: A string representing the local temporary directory. // The function returns the following values: // - myTarget: A target.Target object representing the target host and associated details. -// - err: An error object indicating any error that occurred during the retrieval process. -func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantElevate bool, localTempDir string) (target.Target, error) { +// - targetError: An error indicating a problem with the target host connection. +// - err: An error object indicating any error that occurred during the function execution. +func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantElevate bool, localTempDir string) (target.Target, error, error) { targetHost, _ := cmd.Flags().GetString(flagTargetHostName) targetPort, _ := cmd.Flags().GetString(flagTargetPortName) targetUser, _ := cmd.Flags().GetString(flagTargetUserName) @@ -103,44 +101,44 @@ func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantEleva if !term.IsTerminal(int(os.Stdin.Fd())) { err := fmt.Errorf("can not prompt for SSH password because STDIN isn't coming from a terminal") slog.Error(err.Error()) - return myTarget, err + return myTarget, nil, err } else { slog.Info("Prompting for SSH password.", slog.String("targetHost", targetHost), slog.String("targetUser", targetUser)) sshPwd, err := getPassword(fmt.Sprintf("%s@%s's password", targetUser, targetHost)) if err != nil { - return nil, err + return myTarget, nil, err } var hostArchitecture string hostArchitecture, err = getHostArchitecture() if err != nil { - return nil, err + return myTarget, nil, err } sshPassPath, err := util.ExtractResource(script.Resources, path.Join("resources", hostArchitecture, "sshpass"), localTempDir) if err != nil { - return nil, err + return myTarget, nil, err } myTarget.SetSshPassPath(sshPassPath) myTarget.SetSshPass(sshPwd) - // if still can't connect, return error + // if still can't connect, return target error if !myTarget.CanConnect() { - err = fmt.Errorf("failed to connect to target host (%s) using provided ssh user (%s) and password (****)", targetHost, targetUser) - return nil, err + err = fmt.Errorf("failed to connect to target host (%s)", myTarget.GetName()) + return myTarget, err, nil } } } else { - err := fmt.Errorf("failed to connect to target (%s) using provided target arguments", targetHost) - return nil, err + err := fmt.Errorf("failed to connect to target host (%s)", myTarget.GetName()) + return myTarget, nil, err } } if needsElevatedPrivileges && !myTarget.CanElevatePrivileges() { if failIfCantElevate { err := fmt.Errorf("failed to elevate privileges on remote target") - return nil, err + return myTarget, err, nil } else { slog.Warn("failed to elevate privileges on remote target, continuing without elevated privileges", slog.String("targetHost", targetHost)) } } - return myTarget, nil + return myTarget, nil, nil } // local target myTarget := target.NewLocalTarget() @@ -149,7 +147,7 @@ func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantEleva slog.Warn("can not prompt for sudo password because STDIN isn't coming from a terminal") if failIfCantElevate { err := fmt.Errorf("failed to elevate privileges on local target") - return nil, err + return myTarget, err, nil } else { slog.Warn("continuing without elevated privileges") } @@ -157,7 +155,7 @@ func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantEleva fmt.Fprintf(os.Stderr, "WARNING: some operations cannot be performed without elevated privileges.\n") currentUser, err := user.Current() if err != nil { - return nil, err + return myTarget, nil, err } fmt.Fprintf(os.Stderr, "For complete functionality, please provide your password at the prompt.\n") slog.Info("prompting for sudo password") @@ -165,13 +163,13 @@ func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantEleva var sudoPwd string sudoPwd, err = getPassword(prompt) if err != nil { - return nil, err + return myTarget, nil, err } myTarget.SetSudo(sudoPwd) if !myTarget.CanElevatePrivileges() { if failIfCantElevate { err := fmt.Errorf("failed to elevate privileges on local target") - return nil, err + return myTarget, nil, err } else { slog.Warn("failed to elevate privileges on local target, continuing without elevated privileges") fmt.Fprintf(os.Stderr, "WARNING: Not able to establish elevated privileges with provided password.\n") @@ -180,7 +178,7 @@ func getTarget(cmd *cobra.Command, needsElevatedPrivileges bool, failIfCantEleva } } } - return myTarget, nil + return myTarget, nil, nil } type targetFromYAML struct { @@ -198,7 +196,7 @@ type targetsFile struct { // getTargetsFromFile reads a targets file and returns a list of target objects. // It takes the path to the targets file and the local temporary directory as input. -func getTargetsFromFile(targetsFilePath string, localTempDir string) (targets []target.Target, err error) { +func getTargetsFromFile(targetsFilePath string, localTempDir string) (targets []target.Target, targetErrs []error, err error) { var targetsFile targetsFile // read the file into a byte array yamlFile, err := os.ReadFile(targetsFilePath) @@ -236,6 +234,11 @@ func getTargetsFromFile(targetsFilePath string, localTempDir string) (targets [] newTarget := target.NewRemoteTarget(t.Name, t.Host, t.Port, t.User, t.Key) newTarget.SetSshPassPath(sshPassPath) newTarget.SetSshPass(t.Pwd) + if !newTarget.CanConnect() { + targetErrs = append(targetErrs, fmt.Errorf("failed to connect to target host (%s)", newTarget.GetName())) + } else { + targetErrs = append(targetErrs, nil) + } targets = append(targets, newTarget) } return