Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for kernel lock analysis #114

Merged
merged 6 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Svr-info functionality is now included in PerfSpect. The svr-info configuration
| ------- | ----------- |
| [`perfspect config`](#config-command) | Modify system configuration |
| [`perfspect flame`](#flame-command) | Generate flamegraphs |
| [`perfspect lock`](#lock-command) | Collect system wide hotspot, c2c and lock contention information |
| [`perfspect metrics`](#metrics-command) | Monitor core and uncore metrics |
| [`perfspect report`](#report-command) | Generate configuration report |
| [`perfspect telemetry`](#telemetry-command) | Collect system telemetry |
Expand All @@ -55,6 +56,10 @@ $ ./perfspect config --cores 24 --llc 2.0 --uncoremaxfreq 1.8
```
#### Flame Command
Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flame -h` to capture a system-wide software flamegraph.

#### Lock Command
As system contains more and more cores, it would be usuful to be able to analyze the kernel lock overhead and potential false-sharing that impact the overall system scalability. Run `perfspect lock -h` to collect system wide hotspot, c2c and lock contention information, that will be helpful for experienced performance engineers to have a chance to look into the related problems.

#### Metrics Command
The `metrics` command provides system performance characterization metrics. The metrics provided are dependent on the platform architecture.

Expand Down Expand Up @@ -186,4 +191,4 @@ $ ./perfspect report --benchmark speed,memory --targets targets.yaml
`builder/build.sh` builds the dependencies and the app in Docker containers that provide the required build environments. Assumes you have Docker installed on your development system.

### Subsequent Builds
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.
123 changes: 123 additions & 0 deletions cmd/lock/lock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Package lock is a subcommand of the root command. It is used to collect kernel lock related perf information from target(s).
package lock

// Copyright (C) 2021-2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause

import (
"fmt"
"os"
"perfspect/internal/common"
"perfspect/internal/report"
"strings"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
)

const cmdName = "lock"

var examples = []string{
fmt.Sprintf(" Lock inspect from local host: $ %s %s", common.AppName, cmdName),
fmt.Sprintf(" Lock inspect from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", common.AppName, cmdName),
fmt.Sprintf(" Lock inspect from multiple targets: $ %s %s --targets targets.yaml", common.AppName, cmdName),
}

var Cmd = &cobra.Command{
Use: cmdName,
Short: "Collect system information for kernel lock analysis from target(s)",
Long: "",
Example: strings.Join(examples, "\n"),
RunE: runCmd,
PreRunE: validateFlags,
GroupID: "primary",
Args: cobra.NoArgs,
SilenceErrors: true,
}

var (
flagDuration int
flagFrequency int
)

const (
flagDurationName = "duration"
flagFrequencyName = "frequency"
)

func init() {
Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "")
Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatHtml}, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 10, "")
Cmd.Flags().IntVar(&flagFrequency, flagFrequencyName, 11, "")

common.AddTargetFlags(Cmd)

Cmd.SetUsageFunc(usageFunc)
}

func usageFunc(cmd *cobra.Command) error {
cmd.Printf("Usage: %s [flags]\n\n", cmd.CommandPath())
cmd.Printf("Examples:\n%s\n\n", cmd.Example)
cmd.Println("Flags:")
for _, group := range getFlagGroups() {
cmd.Printf(" %s:\n", group.GroupName)
for _, flag := range group.Flags {
flagDefault := ""
if cmd.Flags().Lookup(flag.Name).DefValue != "" {
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(flag.Name).DefValue)
}
cmd.Printf(" --%-20s %s%s\n", flag.Name, flag.Help, flagDefault)
}
}
cmd.Println("\nGlobal Flags:")
cmd.Parent().PersistentFlags().VisitAll(func(pf *pflag.Flag) {
flagDefault := ""
if cmd.Parent().PersistentFlags().Lookup(pf.Name).DefValue != "" {
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(pf.Name).DefValue)
}
cmd.Printf(" --%-20s %s%s\n", pf.Name, pf.Usage, flagDefault)
})
return nil
}

func getFlagGroups() []common.FlagGroup {
var groups []common.FlagGroup
flags := []common.Flag{
{
Name: flagDurationName,
Help: "number of seconds to run the collection",
},
{
Name: flagFrequencyName,
Help: "number of samples taken per second",
},
}
groups = append(groups, common.FlagGroup{
GroupName: "Options",
Flags: flags,
})
groups = append(groups, common.GetTargetFlagGroup())

return groups
}

func validateFlags(cmd *cobra.Command, args []string) error {
if flagDuration <= 0 {
err := fmt.Errorf("duration must be greater than 0")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
return err
}
return nil
}

func runCmd(cmd *cobra.Command, args []string) error {
reportingCommand := common.ReportingCommand{
Cmd: cmd,
ReportNamePost: "lock",
Frequency: flagFrequency,
Duration: flagDuration,
TableNames: []string{report.KernelLockAnalysisTableName},
}
return reportingCommand.Run()
}
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"perfspect/cmd/config"
"perfspect/cmd/flame"
"perfspect/cmd/lock"
"perfspect/cmd/metrics"
"perfspect/cmd/report"
"perfspect/cmd/telemetry"
Expand Down Expand Up @@ -111,6 +112,7 @@ Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
rootCmd.AddCommand(metrics.Cmd)
rootCmd.AddCommand(telemetry.Cmd)
rootCmd.AddCommand(flame.Cmd)
rootCmd.AddCommand(lock.Cmd)
rootCmd.AddCommand(config.Cmd)
if onIntelNetwork() {
rootCmd.AddGroup([]*cobra.Group{{ID: "other", Title: "Other Commands:"}}...)
Expand Down
16 changes: 16 additions & 0 deletions internal/report/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -1041,3 +1041,19 @@ func codePathFrequencyTableHTMLRenderer(tableValues TableValues, targetName stri
out += renderFlameGraph("Java", tableValues, "Java Paths")
return out
}

func kernelLockAnalysisHTMLRenderer(tableValues TableValues, targetName string) string {
values := [][]string{}
var tableValueStyles [][]string
for _, field := range tableValues.Fields {
rowValues := []string{}
rowValues = append(rowValues, field.Name)
rowValues = append(rowValues, field.Values[0])
values = append(values, rowValues)
rowStyles := []string{}
rowStyles = append(rowStyles, "font-weight:bold")
rowStyles = append(rowStyles, "white-space: pre-wrap")
tableValueStyles = append(tableValueStyles, rowStyles)
}
return renderHTMLTable([]string{}, values, "pure-table pure-table-striped", tableValueStyles)
}
24 changes: 24 additions & 0 deletions internal/report/table_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ const (
ConfigurationTableName = "Configuration"
// flamegraph table names
CodePathFrequencyTableName = "Code Path Frequency"
// lock table names
KernelLockAnalysisTableName = "Kernel Lock Analysis "
)

const (
Expand Down Expand Up @@ -606,6 +608,17 @@ var tableDefinitions = map[string]TableDefinition{
},
FieldsFunc: codePathFrequencyTableValues,
HTMLTableRendererFunc: codePathFrequencyTableHTMLRenderer},
//
// kernel lock analysis tables
//
KernelLockAnalysisTableName: {
Name: KernelLockAnalysisTableName,
ScriptNames: []string{
script.ProfileKernelLockScriptName,
},
FieldsFunc: kernelLockAnalysisTableValues,
HTMLTableRendererFunc: kernelLockAnalysisHTMLRenderer,
},
}

// GetScriptNamesForTable returns the script names required to generate the table with the given name
Expand Down Expand Up @@ -1889,3 +1902,14 @@ func codePathFrequencyTableValues(outputs map[string]script.ScriptOutput) []Fiel
}
return fields
}

func kernelLockAnalysisTableValues(outputs map[string]script.ScriptOutput) []Field {
fields := []Field{
{Name: "Hotspot without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_no_children")}},
{Name: "Hotspot with Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_callgraph")}},
{Name: "Cache2Cache without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_no_children")}},
{Name: "Cache2Cache with CallStack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_callgraph")}},
{Name: "Lock Contention", Values: []string{sectionValueFromOutput(outputs, "perf_lock_contention")}},
}
return fields
}
10 changes: 10 additions & 0 deletions internal/report/table_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1741,3 +1741,13 @@ func systemFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
}
return folded
}

func sectionValueFromOutput(outputs map[string]script.ScriptOutput, sectionName string) string {
sections := getSectionsFromOutput(outputs, script.ProfileKernelLockScriptName)

value := sections[sectionName]
if value == "" {
slog.Warn("No content for section:", slog.String("warning", sectionName))
}
return value
}
60 changes: 60 additions & 0 deletions internal/script/script_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const (
PMUBusyScriptName = "pmu busy"
ProfileJavaScriptName = "profile java"
ProfileSystemScriptName = "profile system"
ProfileKernelLockScriptName = "profile kernel lock"
GaudiInfoScriptName = "gaudi info"
GaudiFirmwareScriptName = "gaudi firmware"
GaudiNumaScriptName = "gaudi numa"
Expand Down Expand Up @@ -958,6 +959,65 @@ fi
Superuser: true,
Depends: []string{"perf", "stackcollapse-perf.pl"},
},
{
Name: ProfileKernelLockScriptName,
Script: func() string {
return fmt.Sprintf(`# system-wide lock profile collection
# adjust perf_event_paranoid and kptr_restrict
PERF_EVENT_PARANOID=$( cat /proc/sys/kernel/perf_event_paranoid )
echo -1 >/proc/sys/kernel/perf_event_paranoid
KPTR_RESTRICT=$( cat /proc/sys/kernel/kptr_restrict )
echo 0 >/proc/sys/kernel/kptr_restrict

frequency=%d
duration=%d

# collect hotspot
perf record -F $frequency -a -g --call-graph dwarf -W -d --phys-data --sample-cpu -e cycles:pp,instructions:pp,cpu/mem-loads,ldlat=30/P,cpu/mem-stores/P -o perf_hotspot.data -- sleep $duration &
PERF_HOTSPOT_PID=$!

# check the availability perf lock -b option
perf lock contention -a -bv --max-stack 20 2>/dev/null -- sleep 0
PERF_LOCK_CONTENTION_BPF=$?

# collect lock
if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
perf lock contention -a -bv --max-stack 20 2>perf_lock_contention.txt -- sleep $duration &
PERF_LOCK_PID=$!
fi

wait ${PERF_HOTSPOT_PID}

if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
wait ${PERF_LOCK_PID}
fi

# restore perf_event_paranoid and kptr_restrict
echo "$PERF_EVENT_PARANOID" > /proc/sys/kernel/perf_event_paranoid
echo "$KPTR_RESTRICT" > /proc/sys/kernel/kptr_restrict

# collapse perf data
if [ -f "perf_hotspot.data" ]; then
echo "########## perf_hotspot_no_children ##########"
perf report -i perf_hotspot.data --no-children --call-graph none --stdio
echo "########## perf_hotspot_callgraph ##########"
perf report -i perf_hotspot.data --stdio
fi
if [ -f "perf_hotspot.data" ]; then
echo "########## perf_c2c_no_children ##########"
perf c2c report -i perf_hotspot.data --call-graph none --stdio
echo "########## perf_c2c_callgraph ##########"
perf c2c report -i perf_hotspot.data --stdio
fi
if [ -f "perf_lock_contention.txt" ]; then
echo "########## perf_lock_contention ##########"
cat perf_lock_contention.txt
fi
`, frequency, duration)
}(),
Superuser: true,
Depends: []string{"perf"},
},
}

// validate script definitions
Expand Down
Loading