Skip to content

Commit

Permalink
feat: pcirebind controller
Browse files Browse the repository at this point in the history
Add a controller to support rebinding drivers for PCI devices.

Fixes: siderolabs/extensions#488

Signed-off-by: Noel Georgi <[email protected]>
  • Loading branch information
frezbo committed Dec 20, 2024
1 parent 4c32616 commit a5660ed
Show file tree
Hide file tree
Showing 46 changed files with 1,795 additions and 26 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,14 @@ jobs:
WITH_CONFIG_INJECTION_METHOD: metal-iso
run: |
sudo -E make e2e-qemu
- name: e2e-iommu-pcidriverrebind
env:
GITHUB_STEP_NAME: ${{ github.job}}-e2e-iommu-pcidriverrebind
IMAGE_REGISTRY: registry.dev.siderolabs.io
SHORT_INTEGRATION_TEST: "yes"
WITH_IOMMU: "yes"
run: |
sudo -E make e2e-qemu
- name: save artifacts
if: always()
uses: actions/upload-artifact@v4
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/integration-misc-3-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-11-28T13:53:18Z by kres 232fe63.
# Generated on 2024-12-10T11:14:00Z by kres 8183c20.

name: integration-misc-3-cron
concurrency:
Expand Down Expand Up @@ -94,6 +94,14 @@ jobs:
WITH_CONFIG_INJECTION_METHOD: metal-iso
run: |
sudo -E make e2e-qemu
- name: e2e-iommu-pcidriverrebind
env:
GITHUB_STEP_NAME: ${{ github.job}}-e2e-iommu-pcidriverrebind
IMAGE_REGISTRY: registry.dev.siderolabs.io
SHORT_INTEGRATION_TEST: "yes"
WITH_IOMMU: "yes"
run: |
sudo -E make e2e-qemu
- name: save artifacts
if: always()
uses: actions/upload-artifact@v4
Expand Down
1 change: 1 addition & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ linters:
- protogetter # too annoying
- perfsprint
- recvcheck # too annoying
- exportloopref # The linter 'exportloopref' is deprecated (since v1.60.2) due to: Since Go1.22 (loopvar) this linter is no longer relevant. Replaced by copyloopvar.
disable-all: false
fast: false

Expand Down
8 changes: 8 additions & 0 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,14 @@ spec:
SHORT_INTEGRATION_TEST: yes
WITH_CONFIG_INJECTION_METHOD: "metal-iso"
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: e2e-iommu-pcidriverrebind
command: e2e-qemu
withSudo: true
environment:
GITHUB_STEP_NAME: ${{ github.job}}-e2e-iommu-pcidriverrebind
SHORT_INTEGRATION_TEST: yes
WITH_IOMMU: yes
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: save-talos-logs
conditions:
- always
Expand Down
12 changes: 12 additions & 0 deletions api/resource/definitions/hardware/hardware.proto
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ message PCIDeviceSpec {
string product_id = 8;
}

// PCIDriverRebindConfigSpec describes PCI rebind configuration.
message PCIDriverRebindConfigSpec {
string pciid = 1;
string target_driver = 2;
}

// PCIDriverRebindStatusSpec describes status of rebinded drivers.
message PCIDriverRebindStatusSpec {
string pciid = 1;
string target_driver = 2;
}

// ProcessorSpec represents a single processor.
message ProcessorSpec {
string socket = 1;
Expand Down
5 changes: 5 additions & 0 deletions cmd/talosctl/cmd/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/siderolabs/talos/pkg/machinery/config/encoder"
"github.com/siderolabs/talos/pkg/machinery/config/types/block"
"github.com/siderolabs/talos/pkg/machinery/config/types/hardware"
"github.com/siderolabs/talos/pkg/machinery/config/types/network"
"github.com/siderolabs/talos/pkg/machinery/config/types/runtime"
"github.com/siderolabs/talos/pkg/machinery/config/types/runtime/extensions"
Expand Down Expand Up @@ -135,6 +136,10 @@ var docsCmd = &cobra.Command{
name: "block",
fileDoc: block.GetFileDoc(),
},
{
name: "hardware",
fileDoc: hardware.GetFileDoc(),
},
} {
path := filepath.Join(dir, pkg.name)

Expand Down
4 changes: 4 additions & 0 deletions cmd/talosctl/cmd/mgmt/cluster/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ const (
firewallFlag = "with-firewall"
tpm2EnabledFlag = "with-tpm2"
withDebugShellFlag = "with-debug-shell"
withIOMMUFlag = "with-iommu"

// The following flags are the gen options - the options that are only used in machine configuration (i.e., not during the qemu/docker provisioning).
// They are not applicable when no machine configuration is generated, hence mutually exclusive with the --input-dir flag.
Expand Down Expand Up @@ -193,6 +194,7 @@ var (
withSiderolinkAgent agentFlag
withJSONLogs bool
debugShellEnabled bool
withIOMMU bool
configInjectionMethodFlag string
mountOpts opts.MountOpt
)
Expand Down Expand Up @@ -480,6 +482,7 @@ func create(ctx context.Context) error {
provision.WithUEFI(uefiEnabled),
provision.WithTPM2(tpm2Enabled),
provision.WithDebugShell(debugShellEnabled),
provision.WithIOMMU(withIOMMU),
provision.WithExtraUEFISearchPaths(extraUEFISearchPaths),
provision.WithTargetArch(targetArch),
provision.WithSiderolinkAgent(withSiderolinkAgent.IsEnabled()),
Expand Down Expand Up @@ -1253,6 +1256,7 @@ func init() {
createCmd.Flags().BoolVar(&uefiEnabled, "with-uefi", true, "enable UEFI on x86_64 architecture")
createCmd.Flags().BoolVar(&tpm2Enabled, tpm2EnabledFlag, false, "enable TPM2 emulation support using swtpm")
createCmd.Flags().BoolVar(&debugShellEnabled, withDebugShellFlag, false, "drop talos into a maintenance shell on boot, this is for advanced debugging for developers only")
createCmd.Flags().BoolVar(&withIOMMU, withIOMMUFlag, false, "enable IOMMU support, this also add a new PCI root port and an interface attached to it (qemu only)")
createCmd.Flags().MarkHidden("with-debug-shell") //nolint:errcheck
createCmd.Flags().StringSliceVar(&extraUEFISearchPaths, "extra-uefi-search-paths", []string{}, "additional search paths for UEFI firmware (only applies when UEFI is enabled)")
createCmd.Flags().StringSliceVar(&registryMirrors, registryMirrorFlag, []string{}, "list of registry mirrors to use in format: <registry host>=<mirror URL>")
Expand Down
6 changes: 6 additions & 0 deletions hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ preface = """
* Linux: 6.12.5
Talos is built with Go 1.23.4.
"""
[notes.driver-rebind]
title = "Driver Rebind"
description = """\
Talos 1.10 now supports a new machine config document named `PCIDriverRebindConfig` that allows rebinding the driver of a PCI device to a different target driver.
See the [documentation](https://www.talos.dev/v1.10/reference/configuration/hardware/pcidriverrebindconfig/) for more information.
"""

[notes.cgroupsv1]
Expand Down
8 changes: 8 additions & 0 deletions hack/test/e2e-qemu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ case "${WITH_CONFIG_INJECTION_METHOD:-default}" in
;;
esac

case "${WITH_IOMMU:-false}" in
false)
;;
*)
QEMU_FLAGS+=("--with-iommu")
;;
esac

function create_cluster {
build_registry_mirrors

Expand Down
178 changes: 178 additions & 0 deletions internal/app/machined/pkg/controllers/hardware/pci_driver_rebind.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package hardware

import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"go.uber.org/zap"

runtimectrl "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/runtime"
v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
)

const (
targetDeviceSYSFSPath = "/sys/bus/pci/devices/%s"
driverOverridePath = targetDeviceSYSFSPath + "/driver_override"
driverUnbindPath = targetDeviceSYSFSPath + "/driver/unbind"
driverPath = targetDeviceSYSFSPath + "/driver"
driverProbePath = "/sys/bus/pci/drivers_probe"
)

// PCIDriverRebindController binds PCI devices to a specific driver and unbinds them from the host driver.
type PCIDriverRebindController struct {
V1Alpha1Mode v1alpha1runtime.Mode

boundDevices map[string]struct{}
}

// Name implements controller.Controller interface.
func (c *PCIDriverRebindController) Name() string {
return "hardware.PCIDriverRebindController"
}

// Inputs implements controller.Controller interface.
func (c *PCIDriverRebindController) Inputs() []controller.Input {
return nil
}

// Outputs implements controller.Controller interface.
func (c *PCIDriverRebindController) Outputs() []controller.Output {
return []controller.Output{
{
Type: hardware.PCIDriverRebindStatusType,
Kind: controller.OutputExclusive,
},
}
}

// Run implements controller.Controller interface.
//
//nolint:gocyclo
func (c *PCIDriverRebindController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) (err error) {
// Skip PCI rebind handling if running in a container or agent mode.
if c.V1Alpha1Mode.InContainer() || c.V1Alpha1Mode.IsAgent() {
return nil
}

if c.boundDevices == nil {
c.boundDevices = map[string]struct{}{}
}

// wait for udevd to be healthy, this is to ensure that host drivers if any are loaded.
if err := runtimectrl.WaitForDevicesReady(ctx, r,
[]controller.Input{
{
Namespace: hardware.NamespaceName,
Type: hardware.PCIDriverRebindConfigType,
Kind: controller.InputWeak,
},
}); err != nil {
return fmt.Errorf("error waiting for devices to be ready: %w", err)
}

for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
}

pciDriverRebindConfigs, err := safe.ReaderListAll[*hardware.PCIDriverRebindConfig](ctx, r)
if err != nil {
return fmt.Errorf("error listing all PCI rebind configs: %w", err)
}

r.StartTrackingOutputs()

touchedIDs := map[string]struct{}{}

for cfg := range pciDriverRebindConfigs.All() {
if err := c.handlePCIDriverReBind(cfg.TypedSpec().PCIID, cfg.TypedSpec().TargetDriver); err != nil {
return err
}

boundDriver, err := checkDeviceBoundDriver(cfg.TypedSpec().PCIID)
if err != nil {
return fmt.Errorf("error checking bound driver for device with id: %s, %w", cfg.TypedSpec().PCIID, err)
}

if boundDriver != cfg.TypedSpec().TargetDriver {
logger.Info(
"cannot validate if device is bound to target driver, ensure target driver module is loaded",
zap.String("id", cfg.TypedSpec().PCIID),
zap.String("targetDriver", cfg.TypedSpec().TargetDriver),
)
}

logger.Info("PCI device bound to target driver", zap.String("id", cfg.TypedSpec().PCIID), zap.String("targetDriver", cfg.TypedSpec().TargetDriver))

if err := safe.WriterModify[*hardware.PCIDriverRebindStatus](ctx, r, hardware.NewPCIDriverRebindStatus(cfg.TypedSpec().PCIID), func(res *hardware.PCIDriverRebindStatus) error {
res.TypedSpec().PCIID = cfg.TypedSpec().PCIID
res.TypedSpec().TargetDriver = cfg.TypedSpec().TargetDriver

return nil
}); err != nil {
return fmt.Errorf("error updating PCI rebind status: %w", err)
}

touchedIDs[cfg.TypedSpec().PCIID] = struct{}{}
c.boundDevices[cfg.TypedSpec().PCIID] = struct{}{}
}

// cleanup any PCI devices that were not touched in the current run.
for pciID := range c.boundDevices {
if _, ok := touchedIDs[pciID]; !ok {
// writing a newline to driver_override file will set the device to default driver based on pci device id.
if err := c.handlePCIDriverReBind(pciID, "\n"); err != nil {
return err
}

logger.Info("PCI device set to default", zap.String("id", pciID))
}
}

if err := safe.CleanupOutputs[*hardware.PCIDriverRebindStatus](ctx, r); err != nil {
return err
}
}
}

// handlePCIBindToTarget binds PCI device to a target driver and unbinds it from the host driver.
func (c *PCIDriverRebindController) handlePCIDriverReBind(pciID, targetDriver string) error {
if err := os.WriteFile(fmt.Sprintf(driverOverridePath, pciID), []byte(targetDriver), 0o200); err != nil {
return fmt.Errorf("error writing driver override for device with id: %s, target driver: %s, %w", pciID, targetDriver, err)
}

// Unbind device from the host driver.
// in some cases, the device may not be bound to any driver, so we ignore the error.
if err := os.WriteFile(fmt.Sprintf(driverUnbindPath, pciID), []byte(pciID), 0o200); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("error unbinding device with id: %s, %w", pciID, err)
}

if err := os.WriteFile(driverProbePath, []byte(pciID), 0o200); err != nil {
return fmt.Errorf("error probing driver for device with id: %s, %w", pciID, err)
}

return nil
}

// checkDeviceBoundDriver checks if the device is bound to a driver or not bound at all.
func checkDeviceBoundDriver(pciID string) (string, error) {
driverPath := fmt.Sprintf(driverPath, pciID)

driver, err := os.Readlink(driverPath)
if err == nil {
return filepath.Base(driver), nil
}

return "", fmt.Errorf("error reading path: %s, %w", driverPath, err)
}
Loading

0 comments on commit a5660ed

Please sign in to comment.