Skip to content

Commit

Permalink
Add support for kernel lock analysis (#114)
Browse files Browse the repository at this point in the history
* Add support for kernel lock analysis

* Fix code format issue by make format

* Fix staticcheck issue

* address the reviewer's comment, except for the html output

* Add html render

* Modify README for perf lock
  • Loading branch information
TianyouLi authored Dec 10, 2024
1 parent 70d79cd commit 240133d
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 1 deletion.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Svr-info functionality is now included in PerfSpect. The svr-info configuration
| ------- | ----------- |
| [`perfspect config`](#config-command) | Modify system configuration |
| [`perfspect flame`](#flame-command) | Generate flamegraphs |
| [`perfspect lock`](#lock-command) | Collect system wide hotspot, c2c and lock contention information |
| [`perfspect metrics`](#metrics-command) | Monitor core and uncore metrics |
| [`perfspect report`](#report-command) | Generate configuration report |
| [`perfspect telemetry`](#telemetry-command) | Collect system telemetry |
Expand All @@ -55,6 +56,10 @@ $ ./perfspect config --cores 24 --llc 2.0 --uncoremaxfreq 1.8
```
#### Flame Command
Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flame -h` to capture a system-wide software flamegraph.

#### Lock Command
As system contains more and more cores, it would be usuful to be able to analyze the kernel lock overhead and potential false-sharing that impact the overall system scalability. Run `perfspect lock -h` to collect system wide hotspot, c2c and lock contention information, that will be helpful for experienced performance engineers to have a chance to look into the related problems.

#### Metrics Command
The `metrics` command provides system performance characterization metrics. The metrics provided are dependent on the platform architecture.

Expand Down Expand Up @@ -186,4 +191,4 @@ $ ./perfspect report --benchmark speed,memory --targets targets.yaml
`builder/build.sh` builds the dependencies and the app in Docker containers that provide the required build environments. Assumes you have Docker installed on your development system.

### Subsequent Builds
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.
123 changes: 123 additions & 0 deletions cmd/lock/lock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Package lock is a subcommand of the root command. It is used to collect kernel lock related perf information from target(s).
package lock

// Copyright (C) 2021-2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause

import (
"fmt"
"os"
"perfspect/internal/common"
"perfspect/internal/report"
"strings"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
)

const cmdName = "lock"

var examples = []string{
fmt.Sprintf(" Lock inspect from local host: $ %s %s", common.AppName, cmdName),
fmt.Sprintf(" Lock inspect from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", common.AppName, cmdName),
fmt.Sprintf(" Lock inspect from multiple targets: $ %s %s --targets targets.yaml", common.AppName, cmdName),
}

var Cmd = &cobra.Command{
Use: cmdName,
Short: "Collect system information for kernel lock analysis from target(s)",
Long: "",
Example: strings.Join(examples, "\n"),
RunE: runCmd,
PreRunE: validateFlags,
GroupID: "primary",
Args: cobra.NoArgs,
SilenceErrors: true,
}

var (
flagDuration int
flagFrequency int
)

const (
flagDurationName = "duration"
flagFrequencyName = "frequency"
)

func init() {
Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "")
Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatHtml}, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 10, "")
Cmd.Flags().IntVar(&flagFrequency, flagFrequencyName, 11, "")

common.AddTargetFlags(Cmd)

Cmd.SetUsageFunc(usageFunc)
}

func usageFunc(cmd *cobra.Command) error {
cmd.Printf("Usage: %s [flags]\n\n", cmd.CommandPath())
cmd.Printf("Examples:\n%s\n\n", cmd.Example)
cmd.Println("Flags:")
for _, group := range getFlagGroups() {
cmd.Printf(" %s:\n", group.GroupName)
for _, flag := range group.Flags {
flagDefault := ""
if cmd.Flags().Lookup(flag.Name).DefValue != "" {
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(flag.Name).DefValue)
}
cmd.Printf(" --%-20s %s%s\n", flag.Name, flag.Help, flagDefault)
}
}
cmd.Println("\nGlobal Flags:")
cmd.Parent().PersistentFlags().VisitAll(func(pf *pflag.Flag) {
flagDefault := ""
if cmd.Parent().PersistentFlags().Lookup(pf.Name).DefValue != "" {
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(pf.Name).DefValue)
}
cmd.Printf(" --%-20s %s%s\n", pf.Name, pf.Usage, flagDefault)
})
return nil
}

func getFlagGroups() []common.FlagGroup {
var groups []common.FlagGroup
flags := []common.Flag{
{
Name: flagDurationName,
Help: "number of seconds to run the collection",
},
{
Name: flagFrequencyName,
Help: "number of samples taken per second",
},
}
groups = append(groups, common.FlagGroup{
GroupName: "Options",
Flags: flags,
})
groups = append(groups, common.GetTargetFlagGroup())

return groups
}

func validateFlags(cmd *cobra.Command, args []string) error {
if flagDuration <= 0 {
err := fmt.Errorf("duration must be greater than 0")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
return err
}
return nil
}

func runCmd(cmd *cobra.Command, args []string) error {
reportingCommand := common.ReportingCommand{
Cmd: cmd,
ReportNamePost: "lock",
Frequency: flagFrequency,
Duration: flagDuration,
TableNames: []string{report.KernelLockAnalysisTableName},
}
return reportingCommand.Run()
}
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"perfspect/cmd/config"
"perfspect/cmd/flame"
"perfspect/cmd/lock"
"perfspect/cmd/metrics"
"perfspect/cmd/report"
"perfspect/cmd/telemetry"
Expand Down Expand Up @@ -111,6 +112,7 @@ Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
rootCmd.AddCommand(metrics.Cmd)
rootCmd.AddCommand(telemetry.Cmd)
rootCmd.AddCommand(flame.Cmd)
rootCmd.AddCommand(lock.Cmd)
rootCmd.AddCommand(config.Cmd)
if onIntelNetwork() {
rootCmd.AddGroup([]*cobra.Group{{ID: "other", Title: "Other Commands:"}}...)
Expand Down
16 changes: 16 additions & 0 deletions internal/report/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -1041,3 +1041,19 @@ func codePathFrequencyTableHTMLRenderer(tableValues TableValues, targetName stri
out += renderFlameGraph("Java", tableValues, "Java Paths")
return out
}

func kernelLockAnalysisHTMLRenderer(tableValues TableValues, targetName string) string {
values := [][]string{}
var tableValueStyles [][]string
for _, field := range tableValues.Fields {
rowValues := []string{}
rowValues = append(rowValues, field.Name)
rowValues = append(rowValues, field.Values[0])
values = append(values, rowValues)
rowStyles := []string{}
rowStyles = append(rowStyles, "font-weight:bold")
rowStyles = append(rowStyles, "white-space: pre-wrap")
tableValueStyles = append(tableValueStyles, rowStyles)
}
return renderHTMLTable([]string{}, values, "pure-table pure-table-striped", tableValueStyles)
}
24 changes: 24 additions & 0 deletions internal/report/table_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ const (
ConfigurationTableName = "Configuration"
// flamegraph table names
CodePathFrequencyTableName = "Code Path Frequency"
// lock table names
KernelLockAnalysisTableName = "Kernel Lock Analysis "
)

const (
Expand Down Expand Up @@ -606,6 +608,17 @@ var tableDefinitions = map[string]TableDefinition{
},
FieldsFunc: codePathFrequencyTableValues,
HTMLTableRendererFunc: codePathFrequencyTableHTMLRenderer},
//
// kernel lock analysis tables
//
KernelLockAnalysisTableName: {
Name: KernelLockAnalysisTableName,
ScriptNames: []string{
script.ProfileKernelLockScriptName,
},
FieldsFunc: kernelLockAnalysisTableValues,
HTMLTableRendererFunc: kernelLockAnalysisHTMLRenderer,
},
}

// GetScriptNamesForTable returns the script names required to generate the table with the given name
Expand Down Expand Up @@ -1889,3 +1902,14 @@ func codePathFrequencyTableValues(outputs map[string]script.ScriptOutput) []Fiel
}
return fields
}

func kernelLockAnalysisTableValues(outputs map[string]script.ScriptOutput) []Field {
fields := []Field{
{Name: "Hotspot without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_no_children")}},
{Name: "Hotspot with Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_callgraph")}},
{Name: "Cache2Cache without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_no_children")}},
{Name: "Cache2Cache with CallStack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_callgraph")}},
{Name: "Lock Contention", Values: []string{sectionValueFromOutput(outputs, "perf_lock_contention")}},
}
return fields
}
10 changes: 10 additions & 0 deletions internal/report/table_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1741,3 +1741,13 @@ func systemFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
}
return folded
}

func sectionValueFromOutput(outputs map[string]script.ScriptOutput, sectionName string) string {
sections := getSectionsFromOutput(outputs, script.ProfileKernelLockScriptName)

value := sections[sectionName]
if value == "" {
slog.Warn("No content for section:", slog.String("warning", sectionName))
}
return value
}
60 changes: 60 additions & 0 deletions internal/script/script_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const (
PMUBusyScriptName = "pmu busy"
ProfileJavaScriptName = "profile java"
ProfileSystemScriptName = "profile system"
ProfileKernelLockScriptName = "profile kernel lock"
GaudiInfoScriptName = "gaudi info"
GaudiFirmwareScriptName = "gaudi firmware"
GaudiNumaScriptName = "gaudi numa"
Expand Down Expand Up @@ -958,6 +959,65 @@ fi
Superuser: true,
Depends: []string{"perf", "stackcollapse-perf.pl"},
},
{
Name: ProfileKernelLockScriptName,
Script: func() string {
return fmt.Sprintf(`# system-wide lock profile collection
# adjust perf_event_paranoid and kptr_restrict
PERF_EVENT_PARANOID=$( cat /proc/sys/kernel/perf_event_paranoid )
echo -1 >/proc/sys/kernel/perf_event_paranoid
KPTR_RESTRICT=$( cat /proc/sys/kernel/kptr_restrict )
echo 0 >/proc/sys/kernel/kptr_restrict
frequency=%d
duration=%d
# collect hotspot
perf record -F $frequency -a -g --call-graph dwarf -W -d --phys-data --sample-cpu -e cycles:pp,instructions:pp,cpu/mem-loads,ldlat=30/P,cpu/mem-stores/P -o perf_hotspot.data -- sleep $duration &
PERF_HOTSPOT_PID=$!
# check the availability perf lock -b option
perf lock contention -a -bv --max-stack 20 2>/dev/null -- sleep 0
PERF_LOCK_CONTENTION_BPF=$?
# collect lock
if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
perf lock contention -a -bv --max-stack 20 2>perf_lock_contention.txt -- sleep $duration &
PERF_LOCK_PID=$!
fi
wait ${PERF_HOTSPOT_PID}
if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
wait ${PERF_LOCK_PID}
fi
# restore perf_event_paranoid and kptr_restrict
echo "$PERF_EVENT_PARANOID" > /proc/sys/kernel/perf_event_paranoid
echo "$KPTR_RESTRICT" > /proc/sys/kernel/kptr_restrict
# collapse perf data
if [ -f "perf_hotspot.data" ]; then
echo "########## perf_hotspot_no_children ##########"
perf report -i perf_hotspot.data --no-children --call-graph none --stdio
echo "########## perf_hotspot_callgraph ##########"
perf report -i perf_hotspot.data --stdio
fi
if [ -f "perf_hotspot.data" ]; then
echo "########## perf_c2c_no_children ##########"
perf c2c report -i perf_hotspot.data --call-graph none --stdio
echo "########## perf_c2c_callgraph ##########"
perf c2c report -i perf_hotspot.data --stdio
fi
if [ -f "perf_lock_contention.txt" ]; then
echo "########## perf_lock_contention ##########"
cat perf_lock_contention.txt
fi
`, frequency, duration)
}(),
Superuser: true,
Depends: []string{"perf"},
},
}

// validate script definitions
Expand Down

0 comments on commit 240133d

Please sign in to comment.