From 240133d7e10f9a9b0893c597b5835103b6d1e3f6 Mon Sep 17 00:00:00 2001 From: Tianyou Li Date: Wed, 11 Dec 2024 05:08:06 +0800 Subject: [PATCH] Add support for kernel lock analysis (#114) * Add support for kernel lock analysis * Fix code format issue by make format * Fix staticcheck issue * address the reviewer's comment, except for the html output * Add html render * Modify README for perf lock --- README.md | 7 +- cmd/lock/lock.go | 123 +++++++++++++++++++++++++++++++ cmd/root.go | 2 + internal/report/html.go | 16 ++++ internal/report/table_defs.go | 24 ++++++ internal/report/table_helpers.go | 10 +++ internal/script/script_defs.go | 60 +++++++++++++++ 7 files changed, 241 insertions(+), 1 deletion(-) create mode 100755 cmd/lock/lock.go diff --git a/README.md b/README.md index 57998c4..b26c4d2 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Svr-info functionality is now included in PerfSpect. The svr-info configuration | ------- | ----------- | | [`perfspect config`](#config-command) | Modify system configuration | | [`perfspect flame`](#flame-command) | Generate flamegraphs | +| [`perfspect lock`](#lock-command) | Collect system wide hotspot, c2c and lock contention information | | [`perfspect metrics`](#metrics-command) | Monitor core and uncore metrics | | [`perfspect report`](#report-command) | Generate configuration report | | [`perfspect telemetry`](#telemetry-command) | Collect system telemetry | @@ -55,6 +56,10 @@ $ ./perfspect config --cores 24 --llc 2.0 --uncoremaxfreq 1.8 ``` #### Flame Command Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flame -h` to capture a system-wide software flamegraph. + +#### Lock Command +As system contains more and more cores, it would be usuful to be able to analyze the kernel lock overhead and potential false-sharing that impact the overall system scalability. Run `perfspect lock -h` to collect system wide hotspot, c2c and lock contention information, that will be helpful for experienced performance engineers to have a chance to look into the related problems. + #### Metrics Command The `metrics` command provides system performance characterization metrics. The metrics provided are dependent on the platform architecture. @@ -186,4 +191,4 @@ $ ./perfspect report --benchmark speed,memory --targets targets.yaml `builder/build.sh` builds the dependencies and the app in Docker containers that provide the required build environments. Assumes you have Docker installed on your development system. ### Subsequent Builds -`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system. \ No newline at end of file +`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system. diff --git a/cmd/lock/lock.go b/cmd/lock/lock.go new file mode 100755 index 0000000..26fc648 --- /dev/null +++ b/cmd/lock/lock.go @@ -0,0 +1,123 @@ +// Package lock is a subcommand of the root command. It is used to collect kernel lock related perf information from target(s). +package lock + +// Copyright (C) 2021-2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause + +import ( + "fmt" + "os" + "perfspect/internal/common" + "perfspect/internal/report" + "strings" + + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +const cmdName = "lock" + +var examples = []string{ + fmt.Sprintf(" Lock inspect from local host: $ %s %s", common.AppName, cmdName), + fmt.Sprintf(" Lock inspect from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", common.AppName, cmdName), + fmt.Sprintf(" Lock inspect from multiple targets: $ %s %s --targets targets.yaml", common.AppName, cmdName), +} + +var Cmd = &cobra.Command{ + Use: cmdName, + Short: "Collect system information for kernel lock analysis from target(s)", + Long: "", + Example: strings.Join(examples, "\n"), + RunE: runCmd, + PreRunE: validateFlags, + GroupID: "primary", + Args: cobra.NoArgs, + SilenceErrors: true, +} + +var ( + flagDuration int + flagFrequency int +) + +const ( + flagDurationName = "duration" + flagFrequencyName = "frequency" +) + +func init() { + Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "") + Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatHtml}, "") + Cmd.Flags().IntVar(&flagDuration, flagDurationName, 10, "") + Cmd.Flags().IntVar(&flagFrequency, flagFrequencyName, 11, "") + + common.AddTargetFlags(Cmd) + + Cmd.SetUsageFunc(usageFunc) +} + +func usageFunc(cmd *cobra.Command) error { + cmd.Printf("Usage: %s [flags]\n\n", cmd.CommandPath()) + cmd.Printf("Examples:\n%s\n\n", cmd.Example) + cmd.Println("Flags:") + for _, group := range getFlagGroups() { + cmd.Printf(" %s:\n", group.GroupName) + for _, flag := range group.Flags { + flagDefault := "" + if cmd.Flags().Lookup(flag.Name).DefValue != "" { + flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(flag.Name).DefValue) + } + cmd.Printf(" --%-20s %s%s\n", flag.Name, flag.Help, flagDefault) + } + } + cmd.Println("\nGlobal Flags:") + cmd.Parent().PersistentFlags().VisitAll(func(pf *pflag.Flag) { + flagDefault := "" + if cmd.Parent().PersistentFlags().Lookup(pf.Name).DefValue != "" { + flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(pf.Name).DefValue) + } + cmd.Printf(" --%-20s %s%s\n", pf.Name, pf.Usage, flagDefault) + }) + return nil +} + +func getFlagGroups() []common.FlagGroup { + var groups []common.FlagGroup + flags := []common.Flag{ + { + Name: flagDurationName, + Help: "number of seconds to run the collection", + }, + { + Name: flagFrequencyName, + Help: "number of samples taken per second", + }, + } + groups = append(groups, common.FlagGroup{ + GroupName: "Options", + Flags: flags, + }) + groups = append(groups, common.GetTargetFlagGroup()) + + return groups +} + +func validateFlags(cmd *cobra.Command, args []string) error { + if flagDuration <= 0 { + err := fmt.Errorf("duration must be greater than 0") + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + return err + } + return nil +} + +func runCmd(cmd *cobra.Command, args []string) error { + reportingCommand := common.ReportingCommand{ + Cmd: cmd, + ReportNamePost: "lock", + Frequency: flagFrequency, + Duration: flagDuration, + TableNames: []string{report.KernelLockAnalysisTableName}, + } + return reportingCommand.Run() +} diff --git a/cmd/root.go b/cmd/root.go index f95d5dd..b609ad5 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -22,6 +22,7 @@ import ( "perfspect/cmd/config" "perfspect/cmd/flame" + "perfspect/cmd/lock" "perfspect/cmd/metrics" "perfspect/cmd/report" "perfspect/cmd/telemetry" @@ -111,6 +112,7 @@ Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}} rootCmd.AddCommand(metrics.Cmd) rootCmd.AddCommand(telemetry.Cmd) rootCmd.AddCommand(flame.Cmd) + rootCmd.AddCommand(lock.Cmd) rootCmd.AddCommand(config.Cmd) if onIntelNetwork() { rootCmd.AddGroup([]*cobra.Group{{ID: "other", Title: "Other Commands:"}}...) diff --git a/internal/report/html.go b/internal/report/html.go index a90f80a..0d5f0d1 100644 --- a/internal/report/html.go +++ b/internal/report/html.go @@ -1041,3 +1041,19 @@ func codePathFrequencyTableHTMLRenderer(tableValues TableValues, targetName stri out += renderFlameGraph("Java", tableValues, "Java Paths") return out } + +func kernelLockAnalysisHTMLRenderer(tableValues TableValues, targetName string) string { + values := [][]string{} + var tableValueStyles [][]string + for _, field := range tableValues.Fields { + rowValues := []string{} + rowValues = append(rowValues, field.Name) + rowValues = append(rowValues, field.Values[0]) + values = append(values, rowValues) + rowStyles := []string{} + rowStyles = append(rowStyles, "font-weight:bold") + rowStyles = append(rowStyles, "white-space: pre-wrap") + tableValueStyles = append(tableValueStyles, rowStyles) + } + return renderHTMLTable([]string{}, values, "pure-table pure-table-striped", tableValueStyles) +} diff --git a/internal/report/table_defs.go b/internal/report/table_defs.go index fbe58e9..3b5202b 100644 --- a/internal/report/table_defs.go +++ b/internal/report/table_defs.go @@ -114,6 +114,8 @@ const ( ConfigurationTableName = "Configuration" // flamegraph table names CodePathFrequencyTableName = "Code Path Frequency" + // lock table names + KernelLockAnalysisTableName = "Kernel Lock Analysis " ) const ( @@ -606,6 +608,17 @@ var tableDefinitions = map[string]TableDefinition{ }, FieldsFunc: codePathFrequencyTableValues, HTMLTableRendererFunc: codePathFrequencyTableHTMLRenderer}, + // + // kernel lock analysis tables + // + KernelLockAnalysisTableName: { + Name: KernelLockAnalysisTableName, + ScriptNames: []string{ + script.ProfileKernelLockScriptName, + }, + FieldsFunc: kernelLockAnalysisTableValues, + HTMLTableRendererFunc: kernelLockAnalysisHTMLRenderer, + }, } // GetScriptNamesForTable returns the script names required to generate the table with the given name @@ -1889,3 +1902,14 @@ func codePathFrequencyTableValues(outputs map[string]script.ScriptOutput) []Fiel } return fields } + +func kernelLockAnalysisTableValues(outputs map[string]script.ScriptOutput) []Field { + fields := []Field{ + {Name: "Hotspot without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_no_children")}}, + {Name: "Hotspot with Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_callgraph")}}, + {Name: "Cache2Cache without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_no_children")}}, + {Name: "Cache2Cache with CallStack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_callgraph")}}, + {Name: "Lock Contention", Values: []string{sectionValueFromOutput(outputs, "perf_lock_contention")}}, + } + return fields +} diff --git a/internal/report/table_helpers.go b/internal/report/table_helpers.go index f1687b7..0c7306e 100644 --- a/internal/report/table_helpers.go +++ b/internal/report/table_helpers.go @@ -1741,3 +1741,13 @@ func systemFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } return folded } + +func sectionValueFromOutput(outputs map[string]script.ScriptOutput, sectionName string) string { + sections := getSectionsFromOutput(outputs, script.ProfileKernelLockScriptName) + + value := sections[sectionName] + if value == "" { + slog.Warn("No content for section:", slog.String("warning", sectionName)) + } + return value +} diff --git a/internal/script/script_defs.go b/internal/script/script_defs.go index 423d712..eead1c6 100644 --- a/internal/script/script_defs.go +++ b/internal/script/script_defs.go @@ -87,6 +87,7 @@ const ( PMUBusyScriptName = "pmu busy" ProfileJavaScriptName = "profile java" ProfileSystemScriptName = "profile system" + ProfileKernelLockScriptName = "profile kernel lock" GaudiInfoScriptName = "gaudi info" GaudiFirmwareScriptName = "gaudi firmware" GaudiNumaScriptName = "gaudi numa" @@ -958,6 +959,65 @@ fi Superuser: true, Depends: []string{"perf", "stackcollapse-perf.pl"}, }, + { + Name: ProfileKernelLockScriptName, + Script: func() string { + return fmt.Sprintf(`# system-wide lock profile collection +# adjust perf_event_paranoid and kptr_restrict +PERF_EVENT_PARANOID=$( cat /proc/sys/kernel/perf_event_paranoid ) +echo -1 >/proc/sys/kernel/perf_event_paranoid +KPTR_RESTRICT=$( cat /proc/sys/kernel/kptr_restrict ) +echo 0 >/proc/sys/kernel/kptr_restrict + +frequency=%d +duration=%d + +# collect hotspot +perf record -F $frequency -a -g --call-graph dwarf -W -d --phys-data --sample-cpu -e cycles:pp,instructions:pp,cpu/mem-loads,ldlat=30/P,cpu/mem-stores/P -o perf_hotspot.data -- sleep $duration & +PERF_HOTSPOT_PID=$! + +# check the availability perf lock -b option +perf lock contention -a -bv --max-stack 20 2>/dev/null -- sleep 0 +PERF_LOCK_CONTENTION_BPF=$? + +# collect lock +if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then + perf lock contention -a -bv --max-stack 20 2>perf_lock_contention.txt -- sleep $duration & + PERF_LOCK_PID=$! +fi + +wait ${PERF_HOTSPOT_PID} + +if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then + wait ${PERF_LOCK_PID} +fi + +# restore perf_event_paranoid and kptr_restrict +echo "$PERF_EVENT_PARANOID" > /proc/sys/kernel/perf_event_paranoid +echo "$KPTR_RESTRICT" > /proc/sys/kernel/kptr_restrict + +# collapse perf data +if [ -f "perf_hotspot.data" ]; then + echo "########## perf_hotspot_no_children ##########" + perf report -i perf_hotspot.data --no-children --call-graph none --stdio + echo "########## perf_hotspot_callgraph ##########" + perf report -i perf_hotspot.data --stdio +fi +if [ -f "perf_hotspot.data" ]; then + echo "########## perf_c2c_no_children ##########" + perf c2c report -i perf_hotspot.data --call-graph none --stdio + echo "########## perf_c2c_callgraph ##########" + perf c2c report -i perf_hotspot.data --stdio +fi +if [ -f "perf_lock_contention.txt" ]; then + echo "########## perf_lock_contention ##########" + cat perf_lock_contention.txt +fi +`, frequency, duration) + }(), + Superuser: true, + Depends: []string{"perf"}, + }, } // validate script definitions