Skip to content

Commit

Permalink
Lock PCI allocated file
Browse files Browse the repository at this point in the history
When running on heavy a load, sriov-cni might be invoked multiple times
by the kubelet or the container runtime. In these situations, it might happen
that a `cmdAdd` or a `cmdDel` run on the same device that is still handled by
a `cmdDel` instance (e.g. when the IPAM plugin takes a long time to finish).

This commit adds a Lock mechanism around the `/var/lib/cni/sriov/<PCI>` address,
to avoid two or more instances handling the same the device.
The lock is based on `flock` [1] using the `O_CLOEXEC` flag, which  garantees that the lock
is released when the process finishes.

[1] https://linux.die.net/man/2/flock

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Jan 15, 2025
1 parent b811f7a commit 723d1e5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 2 deletions.
11 changes: 10 additions & 1 deletion cmd/sriov/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,16 @@ func cmdDel(args *skel.CmdArgs) error {
return nil
}

allocator := utils.NewPCIAllocator(config.DefaultCNIDir)

logging.Debug("Acquire device lock",
"func", "cmdDel",
"DeviceID", netConf.DeviceID)
err = allocator.Lock(netConf.DeviceID)
if err != nil {
return fmt.Errorf("cmdDel() error obtaining lock for device [%s]: %w", netConf.DeviceID, err)
}

defer func() {
if err == nil && cRefPath != "" {
_ = utils.CleanCachedNetConf(cRefPath)
Expand Down Expand Up @@ -317,7 +327,6 @@ func cmdDel(args *skel.CmdArgs) error {
"func", "cmdDel",
"config.DefaultCNIDir", config.DefaultCNIDir,
"netConf.DeviceID", netConf.DeviceID)
allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
if err = allocator.DeleteAllocatedPCI(netConf.DeviceID); err != nil {
return fmt.Errorf("error cleaning the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}
Expand Down
10 changes: 9 additions & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ func LoadConf(bytes []byte) (*sriovtypes.NetConf, error) {
return nil, fmt.Errorf("LoadConf(): VF pci addr is required")
}

allocator := utils.NewPCIAllocator(DefaultCNIDir)
logging.Debug("Acquire device lock",
"func", "LoadConf",
"DeviceID", n.DeviceID)
err := allocator.Lock(n.DeviceID)
if err != nil {
return nil, err
}

// Check if the device is already allocated.
// This is to prevent issues where kubelet request to delete a pod and in the same time a new pod using the same
// vf is started. we can have an issue where the cmdDel of the old pod is called AFTER the cmdAdd of the new one
Expand All @@ -56,7 +65,6 @@ func LoadConf(bytes []byte) (*sriovtypes.NetConf, error) {
"func", "LoadConf",
"DefaultCNIDir", DefaultCNIDir,
"n.DeviceID", n.DeviceID)
allocator := utils.NewPCIAllocator(DefaultCNIDir)
isAllocated, err := allocator.IsAllocated(n.DeviceID)
if err != nil {
return n, err
Expand Down
36 changes: 36 additions & 0 deletions pkg/utils/pci_allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ import (
"fmt"
"os"
"path/filepath"
"time"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/k8snetworkplumbingwg/sriov-cni/pkg/logging"
"golang.org/x/sys/unix"
)

const pciLockAcquireTimeout = 60 * time.Second

type PCIAllocation interface {
SaveAllocatedPCI(string, string) error
DeleteAllocatedPCI(string) error
Expand All @@ -25,6 +29,38 @@ func NewPCIAllocator(dataDir string) *PCIAllocator {
return &PCIAllocator{dataDir: filepath.Join(dataDir, "pci")}
}

// Lock gets an exclusive lock on the given PCI address, ensuring there is no other process configuring / or de-configuring the same device.
func (p *PCIAllocator) Lock(pciAddress string) error {

if err := os.MkdirAll(p.dataDir, 0600); err != nil {
return fmt.Errorf("failed to create the sriov data directory(%q): %v", p.dataDir, err)
}

path := filepath.Join(p.dataDir, pciAddress)

// unix.O_CREAT - Create the file if it doesn't exist
// unix.O_RDWR - Open the file for read/write
// unix.O_CLOEXEC - Automatically close the file on exit. This is useful to keep the flock until the process exits
fd, err := unix.Open(path, unix.O_CREAT|unix.O_RDWR|unix.O_CLOEXEC, 0600)
if err != nil {
return fmt.Errorf("failed to open PCI file [%s] for locking: %w", path, err)
}

errCh := make(chan error)
go func() {
// unix.LOCK_EX - Exclusive lock
errCh <- unix.Flock(fd, unix.LOCK_EX)
}()

select {
case err = <-errCh:
return fmt.Errorf("failed to flock PCI file [%s]: %w", path, err)

case <-time.After(pciLockAcquireTimeout):
return fmt.Errorf("time out while waiting to acquire exclusive lock on [%s]", path)
}
}

// SaveAllocatedPCI creates a file with the pci address as a name and the network namespace as the content
// return error if the file was not created
func (p *PCIAllocator) SaveAllocatedPCI(pciAddress, ns string) error {
Expand Down

0 comments on commit 723d1e5

Please sign in to comment.