Skip to content

Commit

Permalink
Merge pull request #52 from heyvister1/remove-raw-nv-config
Browse files Browse the repository at this point in the history
Removing 'rawNvConfig' to refrain of unstable NIC states in DPU mode
  • Loading branch information
e0ne authored Nov 19, 2024
2 parents cd96e21 + bcf085d commit 3932e77
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 184 deletions.
13 changes: 3 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ If more than one template match a single device, none will be applied and the er

for more information refer to [api-reference](docs/api-reference.md).

> [!IMPORTANT]
> `ResetToDefault` In NIC Configuration Operator template v0.1.14 BF2/BF3 DPUs (not SuperNics) FW reset flow isn't supported.
#### [Example NICConfigurationTemplate](docs/examples/example-nicconfigurationtemplate-connectx6.yaml):

```yaml
Expand Down Expand Up @@ -89,11 +92,6 @@ spec:
gpuDirectOptimized:
enabled: true
env: Baremetal
rawNvConfig:
- name: THIS_IS_A_SPECIAL_NVCONFIG_PARAM
value: "55"
- name: SOME_ADVANCED_NVCONFIG_PARAM
value: "true"
```
#### Configuration details
Expand Down Expand Up @@ -127,11 +125,9 @@ spec:
* `gpuDirectOptimized`: performs gpu direct optimizations. ATM only optimizations for Baremetal environment are supported. If enabled perform the following:
* Set nvconfig `ATS_ENABLED=0`
* Can only be enabled when `pciPerformanceOptimized` is enabled
* `rawNvConfig`: a `map[string]string` which contains NVConfig parameters to apply for a NIC on all of its PFs.
* Both the numeric values and their string aliases, supported by NVConfig, are allowed (e.g. `REAL_TIME_CLOCK_ENABLE=False`, `REAL_TIME_CLOCK_ENABLE=0`).
* For per port parameters (suffix `_P1`, `_P2`) parameters with `_P2` suffix are ignored if the device is single port.
* If a configuration is not set in spec, its non-volatile configuration parameters (if any) should be set to device default.
* Parameters in rawNvConfig are regarded as having no default for this flow


### NicDevice
Expand All @@ -158,9 +154,6 @@ spec:
numVfs: 8
pciPerformanceOptimized:
enabled: true
rawNvConfig:
- name: TLS_OPTIMIZE
value: "1"
status:
conditions:
- reason: UpdateSuccessful
Expand Down
9 changes: 0 additions & 9 deletions api/v1alpha1/nicconfigurationtemplate_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,6 @@ type GpuDirectOptimizedSpec struct {
Env string `json:"env"`
}

type NvConfigParam struct {
// Name of the arbitrary nvconfig parameter
Name string `json:"name"`
// Value of the arbitrary nvconfig parameter
Value string `json:"value"`
}

// ConfigurationTemplateSpec is a set of configurations for the NICs
type ConfigurationTemplateSpec struct {
// Number of VFs to be configured
Expand All @@ -92,8 +85,6 @@ type ConfigurationTemplateSpec struct {
RoceOptimized *RoceOptimizedSpec `json:"roceOptimized,omitempty"`
// GPU Direct optimization settings
GpuDirectOptimized *GpuDirectOptimizedSpec `json:"gpuDirectOptimized,omitempty"`
// List of arbitrary nv config parameters
RawNvConfig []NvConfigParam `json:"rawNvConfig,omitempty"`
}

// NicConfigurationTemplateSpec defines the desired state of NicConfigurationTemplate
Expand Down
3 changes: 2 additions & 1 deletion api/v1alpha1/nicdevice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import (

// NicDeviceConfigurationSpec contains desired configuration of the NIC
type NicDeviceConfigurationSpec struct {
// ResetToDefault specifies whether node agent needs to perform a reset flow
// ResetToDefault specifies whether node agent needs to perform a reset flow.
// In NIC Configuration Operator template v0.1.14 BF2/BF3 DPUs (not SuperNics) FW reset flow isn't supported.
// The following operations will be performed:
// * Nvconfig reset of all non-volatile configurations
// - Mstconfig -d <device> reset for each PF
Expand Down
20 changes: 0 additions & 20 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,6 @@ spec:
required:
- enabled
type: object
rawNvConfig:
description: List of arbitrary nv config parameters
items:
properties:
name:
description: Name of the arbitrary nvconfig parameter
type: string
value:
description: Value of the arbitrary nvconfig parameter
type: string
required:
- name
- value
type: object
type: array
roceOptimized:
description: RoCE optimization settings
properties:
Expand Down
18 changes: 2 additions & 16 deletions config/crd/bases/configuration.net.nvidia.com_nicdevices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ spec:
properties:
resetToDefault:
description: |-
ResetToDefault specifies whether node agent needs to perform a reset flow
ResetToDefault specifies whether node agent needs to perform a reset flow.
In NIC Configuration Operator template v0.1.14 BF2/BF3 DPUs (not SuperNics) FW reset flow isn't supported.
The following operations will be performed:
* Nvconfig reset of all non-volatile configurations
- Mstconfig -d <device> reset for each PF
Expand Down Expand Up @@ -105,21 +106,6 @@ spec:
required:
- enabled
type: object
rawNvConfig:
description: List of arbitrary nv config parameters
items:
properties:
name:
description: Name of the arbitrary nvconfig parameter
type: string
value:
description: Value of the arbitrary nvconfig parameter
type: string
required:
- name
- value
type: object
type: array
roceOptimized:
description: RoCE optimization settings
properties:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,6 @@ spec:
required:
- enabled
type: object
rawNvConfig:
description: List of arbitrary nv config parameters
items:
properties:
name:
description: Name of the arbitrary nvconfig parameter
type: string
value:
description: Value of the arbitrary nvconfig parameter
type: string
required:
- name
- value
type: object
type: array
roceOptimized:
description: RoCE optimization settings
properties:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ spec:
properties:
resetToDefault:
description: |-
ResetToDefault specifies whether node agent needs to perform a reset flow
ResetToDefault specifies whether node agent needs to perform a reset flow.
In NIC Configuration Operator template v0.1.14 BF2/BF3 DPUs (not SuperNics) FW reset flow isn't supported.
The following operations will be performed:
* Nvconfig reset of all non-volatile configurations
- Mstconfig -d <device> reset for each PF
Expand Down Expand Up @@ -105,21 +106,6 @@ spec:
required:
- enabled
type: object
rawNvConfig:
description: List of arbitrary nv config parameters
items:
properties:
name:
description: Name of the arbitrary nvconfig parameter
type: string
value:
description: Value of the arbitrary nvconfig parameter
type: string
required:
- name
- value
type: object
type: array
roceOptimized:
description: RoCE optimization settings
properties:
Expand Down
41 changes: 4 additions & 37 deletions docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ ConfigurationTemplateSpec is a set of configurations for the NICs
<em><a href="#GpuDirectOptimizedSpec">GpuDirectOptimizedSpec</a></em></td>
<td><p>GPU Direct optimization settings</p></td>
</tr>
<tr>
<td><code>rawNvConfig</code><br />
<em><a href="#NvConfigParam">[]NvConfigParam</a></em></td>
<td><p>List of arbitrary nv config parameters</p></td>
</tr>
</tbody>
</table>

Expand Down Expand Up @@ -301,8 +296,9 @@ NicDeviceConfigurationSpec contains desired configuration of the NIC
<tr>
<td><code>resetToDefault</code><br />
<em>bool</em></td>
<td><p>ResetToDefault specifies whether node agent needs to perform a reset flow The following operations will be performed: * Nvconfig reset of all non-volatile configurations - Mstconfig -d reset
for each PF - Mstconfig -d set ADVANCED_PCI_SETTINGS=1 * Node reboot - Applies new NIC NV config - Will undo any runtime configuration previously performed for the device/driver</p></td>
<td><p>ResetToDefault specifies whether node agent needs to perform a reset flow. In NIC Configuration Operator template v0.1.14 BF2/BF3 DPUs (not SuperNics) FW reset flow isn’t supported. The
following operations will be performed: * Nvconfig reset of all non-volatile configurations - Mstconfig -d reset for each PF - Mstconfig -d set ADVANCED_PCI_SETTINGS=1 * Node reboot - Applies new NIC
NV config - Will undo any runtime configuration previously performed for the device/driver</p></td>
</tr>
<tr>
<td><code>template</code><br />
Expand Down Expand Up @@ -471,35 +467,6 @@ NicSelectorSpec is a desired configuration for NICs
</tbody>
</table>

### NvConfigParam

(*Appears on:*[ConfigurationTemplateSpec](#ConfigurationTemplateSpec))

<table>
<colgroup>
<col style="width: 50%" />
<col style="width: 50%" />
</colgroup>
<thead>
<tr>
<th>Field</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>name</code><br />
<em>string</em></td>
<td><p>Name of the arbitrary nvconfig parameter</p></td>
</tr>
<tr>
<td><code>value</code><br />
<em>string</em></td>
<td><p>Value of the arbitrary nvconfig parameter</p></td>
</tr>
</tbody>
</table>

### PciPerformanceOptimizedSpec

(*Appears on:*[ConfigurationTemplateSpec](#ConfigurationTemplateSpec))
Expand Down Expand Up @@ -600,4 +567,4 @@ RoceOptimizedSpec specifies RoCE optimization settings

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

*Generated with `gen-crd-api-reference-docs` on git commit `a799e3a`.*
*Generated with `gen-crd-api-reference-docs` on git commit `40efd3e`.*
5 changes: 0 additions & 5 deletions docs/examples/example-nicconfigurationtemplate-connectx6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,3 @@ spec:
gpuDirectOptimized:
enabled: true
env: Baremetal
rawNvConfig:
- name: THIS_IS_A_SPECIAL_NVCONFIG_PARAM
value: "55"
- name: SOME_ADVANCED_NVCONFIG_PARAM
value: "true"
4 changes: 0 additions & 4 deletions internal/controller/nicdevice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,6 @@ var _ = Describe("NicDeviceReconciler", func() {
Enabled: true,
MaxAccOutRead: 9999,
},
RawNvConfig: []v1alpha1.NvConfigParam{{
Name: "CUSTOM_PARAM",
Value: "true",
}},
},
},
},
Expand Down
19 changes: 9 additions & 10 deletions pkg/host/configvalidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"reflect"
"slices"
"strconv"
"strings"

v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -199,15 +198,15 @@ func (v *configValidationImpl) ConstructNvParamMapFromTemplate(
} else {
applyDefaultNvConfigValueIfExists(consts.AtsEnabledParam, desiredParameters, query)
}

for _, rawParam := range template.RawNvConfig {
// Ignore second port params if device has a single port
if strings.HasSuffix(rawParam.Name, consts.SecondPortPrefix) && !secondPortPresent {
continue
}

desiredParameters[rawParam.Name] = rawParam.Value
}
//TODO: Uncomment once we'll fix DPU mode reset procedure
//for _, rawParam := range template.RawNvConfig {
// // Ignore second port params if device has a single port
// if strings.HasSuffix(rawParam.Name, consts.SecondPortPrefix) && !secondPortPresent {
// continue
// }
//
// desiredParameters[rawParam.Name] = rawParam.Value
//}

return desiredParameters, nil
}
Expand Down
Loading

0 comments on commit 3932e77

Please sign in to comment.