diff --git a/pkg/internal/checkup/checkup.go b/pkg/internal/checkup/checkup.go index 2ab2436c..fc1f184e 100644 --- a/pkg/internal/checkup/checkup.go +++ b/pkg/internal/checkup/checkup.go @@ -56,6 +56,7 @@ type Checkup struct { vmi *kvcorev1.VirtualMachineInstance results status.Results executor testExecutor + cfg config.Config } const VMINamePrefix = "rt-vmi" @@ -66,6 +67,7 @@ func New(client kubeVirtVMIClient, namespace string, checkupConfig config.Config namespace: namespace, vmi: newRealtimeVMI(checkupConfig), executor: executor, + cfg: checkupConfig, } } @@ -100,6 +102,10 @@ func (c *Checkup) Run(ctx context.Context) error { } c.results.VMUnderTestActualNodeName = c.vmi.Status.NodeName + if c.results.OslatMaxLatency > c.cfg.OslatLatencyThreshold { + return fmt.Errorf("oslat Max Latency measured %s exceeded the given threshold %s", + c.results.OslatMaxLatency.String(), c.cfg.OslatLatencyThreshold.String()) + } return nil } diff --git a/pkg/internal/checkup/executor/executor.go b/pkg/internal/checkup/executor/executor.go index d0ea2198..1ec75daf 100644 --- a/pkg/internal/checkup/executor/executor.go +++ b/pkg/internal/checkup/executor/executor.go @@ -28,6 +28,7 @@ import ( "kubevirt.io/client-go/kubecli" "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/checkup/executor/console" + "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/checkup/executor/oslat" "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/config" "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/status" ) @@ -41,14 +42,16 @@ type Executor struct { namespace string vmiUsername string vmiPassword string + OslatDuration time.Duration } -func New(client vmiSerialConsoleClient, namespace string) Executor { +func New(client vmiSerialConsoleClient, namespace string, cfg config.Config) Executor { return Executor{ vmiSerialClient: client, namespace: namespace, vmiUsername: config.VMIUsername, vmiPassword: config.VMIPassword, + OslatDuration: cfg.OslatDuration, } } @@ -59,5 +62,15 @@ func (e Executor) Execute(ctx context.Context, vmiUnderTestName string) (status. return status.Results{}, fmt.Errorf("failed to login to VMI \"%s/%s\": %w", e.namespace, vmiUnderTestName, err) } - return status.Results{}, nil + oslatClient := oslat.NewClient(vmiUnderTestConsoleExpecter, e.OslatDuration) + log.Printf("Running Oslat test on VMI under test for %s...", e.OslatDuration.String()) + maxLatency, err := oslatClient.Run(ctx) + if err != nil { + return status.Results{}, fmt.Errorf("failed to run Oslat on VMI \"%s/%s\": %w", e.namespace, vmiUnderTestName, err) + } + log.Printf("Max Oslat Latency measured: %s", maxLatency.String()) + + return status.Results{ + OslatMaxLatency: maxLatency, + }, nil } diff --git a/pkg/internal/checkup/executor/oslat/client.go b/pkg/internal/checkup/executor/oslat/client.go new file mode 100644 index 00000000..e5fecd81 --- /dev/null +++ b/pkg/internal/checkup/executor/oslat/client.go @@ -0,0 +1,217 @@ +/* + * This file is part of the kiagnose project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2023 Red Hat, Inc. + * + */ + +package oslat + +import ( + "bufio" + "context" + "fmt" + "log" + "regexp" + "strconv" + "strings" + "time" + + expect "github.com/google/goexpect" + + "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/checkup/executor/console" +) + +type consoleExpecter interface { + SafeExpectBatchWithResponse(expected []expect.Batcher, timeout time.Duration) ([]expect.BatchRes, error) +} + +type Client struct { + consoleExpecter consoleExpecter + testDuration time.Duration +} + +func NewClient(vmiUnderTestConsoleExpecter consoleExpecter, testDuration time.Duration) *Client { + return &Client{ + consoleExpecter: vmiUnderTestConsoleExpecter, + testDuration: testDuration, + } +} + +func (t Client) Run(ctx context.Context) (time.Duration, error) { + type result struct { + stdout string + err error + } + + resultCh := make(chan result) + go func() { + defer close(resultCh) + const testTimeoutGrace = 5 * time.Minute + + oslatCmd := buildOslatCmd(t.testDuration) + + resp, err := t.consoleExpecter.SafeExpectBatchWithResponse([]expect.Batcher{ + &expect.BSnd{S: oslatCmd + "\n"}, + &expect.BExp{R: console.PromptExpression}, + &expect.BSnd{S: "echo $?\n"}, + &expect.BExp{R: console.PromptExpression}, + }, + t.testDuration+testTimeoutGrace, + ) + if err != nil { + resultCh <- result{"", err} + return + } + + exitCode, err := getExitCode(resp[1].Output) + if err != nil { + resultCh <- result{"", fmt.Errorf("oslat test failed to get exit code: %w", err)} + return + } + stdout := resp[0].Output + const successExitCode = 0 + if exitCode != successExitCode { + log.Printf("oslat test returned exit code: %d. stdout: %s", exitCode, stdout) + resultCh <- result{stdout, fmt.Errorf("oslat test failed with exit code: %d. See logs for more information", exitCode)} + return + } + + resultCh <- result{stdout, nil} + }() + + var res result + select { + case res = <-resultCh: + if res.err != nil { + return 0, res.err + } + case <-ctx.Done(): + return 0, fmt.Errorf("oslat test canceled due to context closing: %w", ctx.Err()) + } + + log.Printf("Oslat test completed:\n%v", res.stdout) + return parseMaxLatency(res.stdout) +} + +func getExitCode(returnVal string) (int, error) { + pattern := `\r\n(\d+)\r\n` + re := regexp.MustCompile(pattern) + matches := re.FindStringSubmatch(returnVal) + + const minExpectedMatches = 2 + if len(matches) < minExpectedMatches { + return 0, fmt.Errorf("failed to parse exit value") + } + + exitCode, err := strconv.Atoi(matches[1]) + if err != nil { + return 0, err + } + + return exitCode, nil +} + +func parseMaxLatency(oslatOutput string) (time.Duration, error) { + const maximumKeyword = "Maximum" + + maximumEntryLine, err := getResultEntryByKey(oslatOutput, maximumKeyword) + if err != nil { + return 0, err + } + + maxLatencyValues, units, err := parseMaxEntryLine(maximumEntryLine) + if err != nil { + return 0, err + } + + return getMaxLatencyValue(maxLatencyValues, units) +} + +func getResultEntryByKey(input, entryKey string) (string, error) { + scanner := bufio.NewScanner(strings.NewReader(input)) + for scanner.Scan() { + line := scanner.Text() + if !strings.Contains(line, entryKey) { + continue + } + return line, nil + } + if scanErr := scanner.Err(); scanErr != nil { + return "", scanErr + } + return "", fmt.Errorf("failed parsing maximum latency from oslat results") +} + +func extractUnits(line string) (lineWithoutUnits, units string, err error) { + re := regexp.MustCompile(`\((.+?)\)`) + matches := re.FindStringSubmatch(line) + + const minExpectedMatches = 2 + if len(matches) < minExpectedMatches { + return "", "", fmt.Errorf("units not found in line: %s", line) + } + + units = matches[1] + lineWithoutUnits = strings.Replace(line, matches[0], "", 1) + return lineWithoutUnits, units, nil +} + +func parseMaxEntryLine(maximumEntryLine string) (values []string, units string, err error) { + const keyValDelimiter = ":" + var keyWithValues string + keyWithValues, units, err = extractUnits(maximumEntryLine) + if err != nil { + return nil, "", fmt.Errorf("failed to extract units: %w", err) + } + keyWithValuesSlice := strings.Split(keyWithValues, keyValDelimiter) + return strings.Fields(keyWithValuesSlice[1]), units, nil +} + +func getMaxLatencyValue(values []string, units string) (time.Duration, error) { + var coreMaxLatencyDuration time.Duration + var maxCoresLatencyDuration time.Duration + var err error + for _, coreMaxLatencyStr := range values { + corMaxLatencyWithUnits := coreMaxLatencyStr + units + if coreMaxLatencyDuration, err = time.ParseDuration(corMaxLatencyWithUnits); err != nil { + return 0, fmt.Errorf("failed to parse core maximum latency %s: %w", corMaxLatencyWithUnits, err) + } + if coreMaxLatencyDuration > maxCoresLatencyDuration { + maxCoresLatencyDuration = coreMaxLatencyDuration + } + } + return maxCoresLatencyDuration, nil +} + +func buildOslatCmd(testDuration time.Duration) string { + const ( + cpuList = "1-2" + realtimePriority = "1" + workload = "memmove" + workloadMemory = "4K" + ) + + sb := strings.Builder{} + sb.WriteString(fmt.Sprintf("taskset -c %s ", cpuList)) + sb.WriteString("oslat ") + sb.WriteString(fmt.Sprintf("--cpu-list %s ", cpuList)) + sb.WriteString(fmt.Sprintf("--rtprio %s ", realtimePriority)) + sb.WriteString(fmt.Sprintf("--duration %s ", testDuration.String())) + sb.WriteString(fmt.Sprintf("--workload %s ", workload)) + sb.WriteString(fmt.Sprintf("--workload-mem %s ", workloadMemory)) + + return sb.String() +} diff --git a/pkg/internal/checkup/executor/oslat/client_test.go b/pkg/internal/checkup/executor/oslat/client_test.go new file mode 100644 index 00000000..8f493269 --- /dev/null +++ b/pkg/internal/checkup/executor/oslat/client_test.go @@ -0,0 +1,262 @@ +/* + * This file is part of the kiagnose project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2023 Red Hat, Inc. + * + */ + +package oslat_test + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + expect "github.com/google/goexpect" + assert "github.com/stretchr/testify/require" + + "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/checkup/executor/console" + "github.com/kiagnose/kubevirt-realtime-checkup/pkg/internal/checkup/executor/oslat" +) + +const oslatTestDuration = time.Minute + +func TestRunSuccess(t *testing.T) { + expecter := &expecterStub{ + injectedActualMaxResults: "27 56 (us)", + } + + oslatClient := oslat.NewClient( + expecter, + oslatTestDuration, + ) + + maxLatency, err := oslatClient.Run(context.Background()) + assert.NoError(t, err, "Run returned an error") + expected := 56 * time.Microsecond + assert.Equal(t, expected, maxLatency, "Run returned unexpected result") +} + +func TestRunFailure(t *testing.T) { + t.Run("when console returns batch error", func(t *testing.T) { + expectedBatchErr := errors.New("some error") + expecter := &expecterStub{ + expectBatchFailureErr: expectedBatchErr, + } + oslatClient := oslat.NewClient( + expecter, + oslatTestDuration, + ) + + _, err := oslatClient.Run(context.Background()) + assert.ErrorContains(t, err, expectedBatchErr.Error()) + }) + t.Run("when run command returns non-success return value", func(t *testing.T) { + expectedRunErr := errors.New("oslat test failed with exit code") + expecter := &expecterStub{ + expectRunFailureErr: expectedRunErr, + } + oslatClient := oslat.NewClient( + expecter, + oslatTestDuration, + ) + + _, err := oslatClient.Run(context.Background()) + assert.ErrorContains(t, err, expectedRunErr.Error()) + }) + t.Run("when batch times out", func(t *testing.T) { + expectedTimeoutErr := errors.New("run failed due to timeout") + expecter := &expecterStub{ + batchRunTimeoutErr: expectedTimeoutErr, + } + oslatClient := oslat.NewClient( + expecter, + oslatTestDuration, + ) + + _, err := oslatClient.Run(context.Background()) + assert.ErrorContains(t, err, expectedTimeoutErr.Error()) + }) + t.Run("when oslat returns invalid data", func(t *testing.T) { + expectedInvalidOslatOutputErr := errors.New("failed parsing maximum latency from oslat results") + oslatClient := oslat.NewClient( + &expecterStub{ + expectRunInvalidOutput: true, + }, + oslatTestDuration, + ) + + _, err := oslatClient.Run(context.Background()) + assert.ErrorContains(t, err, expectedInvalidOslatOutputErr.Error()) + }) + t.Run("when checkup context times out", func(t *testing.T) { + expectedCheckupTimeoutErr := errors.New("oslat test canceled due to context closing") + oslatClient := oslat.NewClient( + &expecterStub{}, + oslatTestDuration, + ) + + fakeClock := newFakeClock() + exceededDeadline := fakeClock.Now().Add(-time.Second) + ctx, cancel := context.WithDeadline(context.Background(), exceededDeadline) + defer cancel() + + _, err := oslatClient.Run(ctx) + assert.ErrorContains(t, err, expectedCheckupTimeoutErr.Error()) + }) +} + +// fakeClock is a custom fake clock implementation +type fakeClock struct { + current time.Time +} + +func newFakeClock() fakeClock { + return fakeClock{ + current: time.Now(), + } +} + +// Now returns the current time of the fake clock +func (c fakeClock) Now() time.Time { + return c.current +} + +const ( + oslatRunCmd = "taskset -c 1-2 oslat --cpu-list 1-2 --rtprio 1 --duration 1m0s --workload memmove --workload-mem 4K \n" + oslatRunResultsTemplate = "oslat V 2.60\n" + + "Total runtime: \t\t60 seconds\n" + + "Thread priority: \tSCHED_FIFO:1\n" + + "CPU list: \t\t1-2\n" + + "CPU for main thread: \t0\n" + + "Workload: \t\tmemmove\n" + + "Workload mem: \t\t4 (KiB)\n" + + "Preheat cores: \t\t2\n" + + "\n" + + "Pre-heat for 1 seconds...\n" + + "Test starts...\n" + + "Test completed.\n" + + "\n" + + " Core:\t 1 2\n" + + "Counter Freq:\t 2096 2096 (Mhz)\n" + + " 001 (us):\t 0 0\n" + + " 002 (us):\t 582681699 615399319\n" + + " 003 (us):\t 24 28\n" + + " 004 (us):\t 23 18\n" + + " 005 (us):\t 13 2805\n" + + " 006 (us):\t 10492 21962\n" + + " 007 (us):\t 19863 6356\n" + + " 008 (us):\t 10473 12481\n" + + " 009 (us):\t 11218 11417\n" + + " 010 (us):\t 4928 3433\n" + + " 011 (us):\t 1443 880\n" + + " 012 (us):\t 524 211\n" + + " 013 (us):\t 239 84\n" + + " 014 (us):\t 178 41\n" + + " 015 (us):\t 170 39\n" + + " 016 (us):\t 191 74\n" + + " 017 (us):\t 109 66\n" + + " 018 (us):\t 33 56\n" + + " 019 (us):\t 36 47\n" + + " 020 (us):\t 52 23\n" + + " 021 (us):\t 35 17\n" + + " 022 (us):\t 15 10\n" + + " 023 (us):\t 6 8\n" + + " 024 (us):\t 0 5\n" + + " 025 (us):\t 2 4\n" + + " 026 (us):\t 0 1\n" + + " 027 (us):\t 0 1\n" + + " 028 (us):\t 1 1\n" + + " 029 (us):\t 0 0\n" + + " 030 (us):\t 0 1\n" + + " 031 (us):\t 0 0\n" + + " 032 (us):\t 0 2 (including overflows)\n" + + " Minimum:\t 1 1 (us)\n" + + " Average:\t 2.001 2.001 (us)\n" + + " Maximum:\t %s\n" + + " Max-Min:\t 26 55 (us)\n" + + " Duration:\t 59.970 59.970 (sec)\n" + + "\n" + + "[root@rt-vmi-rw5tr cloud-user]#" + + oslatRunInvalidOutput = "oslat V 2.60\n" + + "Total runtime: \t\t300 seconds\n" + + "Thread priority: \tSCHED_FIFO:1\n" + + "CPU list: \t\t1\n" + + "CPU for main thread: \t0\n" + + "Workload: \t\tmemmove\n" + + "Workload mem: \t\t4 (KiB)\n" + + "Preheat cores: \t\t1\n" + + "\n" + + "Pre-heat for 1 seconds...\n" + + "Test starts...\n" + + "Test completed.\n" + + "\n" +) + +type expecterStub struct { + injectedActualMaxResults string + batchRunTimeoutErr error + expectBatchFailureErr error + expectRunFailureErr error + expectRunInvalidOutput bool +} + +func generateBatchResponseWithRetval(runStdout string, runRetVal int) []expect.BatchRes { + return []expect.BatchRes{ + { + Idx: 1, + Output: runStdout, + }, + { + Idx: 2, + Output: fmt.Sprintf("%s%d%s", console.CRLF, runRetVal, console.CRLF), + }, + } +} + +func (es expecterStub) SafeExpectBatchWithResponse(expected []expect.Batcher, _ time.Duration) ([]expect.BatchRes, error) { + const ( + successExitCode = 0 + failureExitCode = 127 + ) + + if es.batchRunTimeoutErr != nil { + return nil, es.batchRunTimeoutErr + } + if es.expectBatchFailureErr != nil { + return nil, es.expectBatchFailureErr + } + + var batchRes []expect.BatchRes + switch expected[0].Arg() { + case oslatRunCmd: + if es.expectRunFailureErr != nil { + batchRes = generateBatchResponseWithRetval(es.expectRunFailureErr.Error(), failureExitCode) + } else if es.expectRunInvalidOutput { + batchRes = generateBatchResponseWithRetval(oslatRunInvalidOutput, successExitCode) + } else { + oslatOutput := fmt.Sprintf(oslatRunResultsTemplate, es.injectedActualMaxResults) + batchRes = generateBatchResponseWithRetval(oslatOutput, successExitCode) + } + + default: + return nil, fmt.Errorf("command not recognized: %q", expected[0].Arg()) + } + + return batchRes, nil +} diff --git a/pkg/mainflow.go b/pkg/mainflow.go index 6a5cec58..e6b463f3 100644 --- a/pkg/mainflow.go +++ b/pkg/mainflow.go @@ -51,7 +51,7 @@ func Run(rawEnv map[string]string, namespace string) error { printConfig(cfg) - realtimeCheckupExecutor := executor.New(c, namespace) + realtimeCheckupExecutor := executor.New(c, namespace, cfg) l := launcher.New( checkup.New(c, namespace, cfg, realtimeCheckupExecutor), reporter.New(c, baseConfig.ConfigMapNamespace, baseConfig.ConfigMapName), diff --git a/tests/checkup_test.go b/tests/checkup_test.go index 989633bd..1936dee7 100644 --- a/tests/checkup_test.go +++ b/tests/checkup_test.go @@ -106,6 +106,8 @@ var _ = Describe("Checkup execution", func() { Expect(configMap.Data["status.failureReason"]).To(BeEmpty(), fmt.Sprintf("should be empty %+v", configMap.Data)) Expect(configMap.Data["status.result.vmUnderTestActualNodeName"]). ToNot(BeEmpty(), fmt.Sprintf("vmUnderTestActualNodeName should not be empty %+v", configMap.Data)) + Expect(configMap.Data["status.result.oslatMaxLatencyMicroSeconds"]). + ToNot(BeEmpty(), fmt.Sprintf("oslatMaxLatencyMicroSeconds should not be empty %+v", configMap.Data)) }) })