Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for userspace device drivers with HW offload mode #322

Merged
merged 2 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 44 additions & 19 deletions pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,15 @@ func CmdAdd(args *skel.CmdArgs) error {
return err
}

// check if the device driver is the type of userspace driver
userspaceMode := false
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
userspaceMode, err = sriov.HasUserspaceDriver(netconf.DeviceID)
if err != nil {
return err
}
}

// removes all ports whose interfaces have an error
if err := cleanPorts(ovsBridgeDriver); err != nil {
return err
Expand All @@ -302,8 +311,9 @@ func CmdAdd(args *skel.CmdArgs) error {
}
defer contNetns.Close()

// userspace driver does not create a network interface for the VF on the host
var origIfName string
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) && !userspaceMode {
origIfName, err = sriov.GetVFLinkName(netconf.DeviceID)
if err != nil {
return err
Expand All @@ -312,13 +322,13 @@ func CmdAdd(args *skel.CmdArgs) error {

// Cache NetConf for CmdDel
if err = utils.SaveCache(config.GetCRef(args.ContainerID, args.IfName),
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName}); err != nil {
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName, UserspaceMode: userspaceMode}); err != nil {
return fmt.Errorf("error saving NetConf %q", err)
}

var hostIface, contIface *current.Interface
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, netconf.MTU, netconf.DeviceID)
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, mac, netconf.MTU, netconf.DeviceID, userspaceMode)
if err != nil {
return err
}
Expand Down Expand Up @@ -353,7 +363,9 @@ func CmdAdd(args *skel.CmdArgs) error {
}

// run the IPAM plugin
if netconf.IPAM.Type != "" {
// userspace driver does not support IPAM plugin,
// because there is no network interface for the VF on the host
if netconf.IPAM.Type != "" && !userspaceMode {
var r cnitypes.Result
r, err = ipam.ExecAdd(netconf.IPAM.Type, args.StdinData)
defer func() {
Expand Down Expand Up @@ -562,8 +574,11 @@ func CmdDel(args *skel.CmdArgs) error {
// port is already deleted in a previous invocation.
log.Printf("Error: %v\n", err)
}
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
return err
// there is no network interface in case of userspace driver, so OrigIfName is empty
if !cache.UserspaceMode {
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
return err
}
}
} else {
// In accordance with the spec we clean up as many resources as possible.
Expand Down Expand Up @@ -591,11 +606,14 @@ func CmdDel(args *skel.CmdArgs) error {
}

if sriov.IsOvsHardwareOffloadEnabled(cache.Netconf.DeviceID) {
err = sriov.ReleaseVF(args, cache.OrigIfName)
if err != nil {
// try to reset vf into original state as much as possible in case of error
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
// there is no network interface in case of userspace driver, so OrigIfName is empty
if !cache.UserspaceMode {
err = sriov.ReleaseVF(args, cache.OrigIfName)
if err != nil {
// try to reset vf into original state as much as possible in case of error
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
}
}
}
} else {
Expand Down Expand Up @@ -633,14 +651,6 @@ func CmdCheck(args *skel.CmdArgs) error {
}
ovsHWOffloadEnable := sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID)

// run the IPAM plugin
if netconf.NetConf.IPAM.Type != "" {
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
if err != nil {
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
}
}

envArgs, err := getEnvArgs(args.Args)
if err != nil {
return err
Expand Down Expand Up @@ -672,6 +682,21 @@ func CmdCheck(args *skel.CmdArgs) error {
return err
}

// TODO: CmdCheck for userspace driver
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you plan to implement this check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, not in this PR. I will make a separate commit for CmdCheck later.

if cache.UserspaceMode {
return nil
}

// run the IPAM plugin
// userspace driver does not support IPAM plugin,
// because there is no network interface for the VF on the host
if netconf.NetConf.IPAM.Type != "" && !cache.UserspaceMode {
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
if err != nil {
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
}
}

// Parse previous result.
if netconf.NetConf.RawPrevResult == nil {
return fmt.Errorf("Required prevResult missing")
Expand Down
164 changes: 135 additions & 29 deletions pkg/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package sriov

import (
"fmt"
"net"
"os"
"path/filepath"

Expand All @@ -32,7 +33,8 @@ import (

var (
// SysBusPci is sysfs pci device directory
SysBusPci = "/sys/bus/pci/devices"
SysBusPci = "/sys/bus/pci/devices"
UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"}
)

// GetVFLinkName retrives interface name for given pci address
Expand Down Expand Up @@ -66,6 +68,27 @@ func IsOvsHardwareOffloadEnabled(deviceID string) bool {
return deviceID != ""
}

// HasUserspaceDriver checks if a device is attached to userspace driver
// This method is copied from https://github.com/k8snetworkplumbingwg/sriov-cni/blob/8af83a33b2cac8e2df0bd6276b76658eb7c790ab/pkg/utils/utils.go#L222
func HasUserspaceDriver(pciAddr string) (bool, error) {
driverLink := filepath.Join(SysBusPci, pciAddr, "driver")
driverPath, err := filepath.EvalSymlinks(driverLink)
if err != nil {
return false, err
}
driverStat, err := os.Stat(driverPath)
if err != nil {
return false, err
}
driverName := driverStat.Name()
for _, drv := range UserspaceDrivers {
if driverName == drv {
return true, nil
}
}
return false, nil
}

// GetBridgeUplinkNameByDeviceID tries to automatically resolve uplink interface name
// for provided VF deviceID by following the sequence:
// VF pci address > PF pci address > Bond (optional, if PF is part of a bond)
Expand Down Expand Up @@ -159,48 +182,33 @@ func GetNetRepresentor(deviceID string) (string, error) {
return rep, nil
}

// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) {
hostIface := &current.Interface{}
contIface := &current.Interface{}

// setupKernelSriovContIface moves smartVF into container namespace,
// configures the smartVF and also fills in the contIface fields
func setupKernelSriovContIface(contNetns ns.NetNS, contIface *current.Interface, deviceID string, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr, mtu int) error {
// get smart VF netdevice from PCI
vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID)
if err != nil {
return nil, nil, err
return err
}

// Make sure we have 1 netdevice per pci address
if len(vfNetdevices) != 1 {
return nil, nil, fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
return fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
}
vfNetdevice := vfNetdevices[0]

// network representor device for smartvf
rep, err := GetNetRepresentor(deviceID)
if err != nil {
return nil, nil, err
}

hostIface.Name = rep

link, err := netlink.LinkByName(hostIface.Name)
if err != nil {
return nil, nil, err
}
hostIface.Mac = link.Attrs().HardwareAddr.String()

// set MTU on smart VF representor
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
// if MAC address is provided, set it to the VF by using PF netlink
// which is accessible in the host namespace, not in the container namespace
if hwaddr != nil {
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
return err
}
}

// Move smart VF to Container namespace
err = moveIfToNetns(vfNetdevice, contNetns)
if err != nil {
return nil, nil, err
return err
}

err = contNetns.Do(func(hostNS ns.NetNS) error {
Expand All @@ -209,10 +217,20 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
if err != nil {
return err
}
link, err = netlink.LinkByName(contIface.Name)
link, err := netlink.LinkByName(contIface.Name)
if err != nil {
return err
}
// if MAC address is provided, set it to the kernel VF netdevice
// otherwise, read the MAC address from the kernel VF netdevice
if hwaddr != nil {
if err = netlink.LinkSetHardwareAddr(link, hwaddr); err != nil {
return err
}
contIface.Mac = hwaddr.String()
} else {
contIface.Mac = link.Attrs().HardwareAddr.String()
}
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return err
Expand All @@ -223,13 +241,101 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
return err
}
contIface.Sandbox = contNetns.Path()
contIface.Mac = link.Attrs().HardwareAddr.String()

return nil
})
if err != nil {
return err
}

return nil
}

// setupUserspaceSriovContIface configures smartVF via PF netlink and fills in the contIface fields
func setupUserspaceSriovContIface(contNetns ns.NetNS, contIface *current.Interface, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr) error {
contIface.Name = ifName
contIface.Sandbox = contNetns.Path()

// if MAC address is provided, set it to the VF by using PF netlink
if hwaddr != nil {
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
return err
}
contIface.Mac = hwaddr.String()
} else {
vfInfo := pfLink.Attrs().Vfs[vfIdx]
contIface.Mac = vfInfo.Mac.String()
}

return nil
}

// SetupSriovInterface configures smartVF and returns VF's representor device as host interface and VF's netdevice as container interface
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName, mac string, mtu int, deviceID string, userspaceMode bool) (*current.Interface, *current.Interface, error) {
hostIface := &current.Interface{}
contIface := &current.Interface{}

// network representor device for smartvf
rep, err := GetNetRepresentor(deviceID)
if err != nil {
return nil, nil, err
}

hostIface.Name = rep

link, err := netlink.LinkByName(hostIface.Name)
if err != nil {
return nil, nil, err
}
hostIface.Mac = link.Attrs().HardwareAddr.String()

// get PF netlink and VF index from PCI address
pfIface, err := sriovnet.GetUplinkRepresentor(deviceID)
if err != nil {
return nil, nil, err
}
pfLink, err := netlink.LinkByName(pfIface)
if err != nil {
return nil, nil, err
}
vfIdx, err := sriovnet.GetVfIndexByPciAddress(deviceID)
if err != nil {
return nil, nil, err
}

// make sure PF netlink and VF index are valid
if len(pfLink.Attrs().Vfs) < vfIdx || pfLink.Attrs().Vfs[vfIdx].ID != vfIdx {
return nil, nil, fmt.Errorf("failed to get vf info from %s at index %d with Vfs %v", pfIface, vfIdx, pfLink.Attrs().Vfs)
}

// parse MAC address if provided from args as described
// in the CNI spec (https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md)
var hwaddr net.HardwareAddr
if mac != "" {
hwaddr, err = net.ParseMAC(mac)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse MAC address %q: %v", mac, err)
}
}

// set MTU on smart VF representor
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
}
}

if !userspaceMode {
// configure the smart VF netdevice directly in the container namespace
if err = setupKernelSriovContIface(contNetns, contIface, deviceID, pfLink, vfIdx, ifName, hwaddr, mtu); err != nil {
return nil, nil, err
}
} else {
// configure the smart VF netdevice via PF netlink
if err = setupUserspaceSriovContIface(contNetns, contIface, pfLink, vfIdx, ifName, hwaddr); err != nil {
return nil, nil, err
}
}

return hostIface, contIface, nil
}
Expand Down
10 changes: 6 additions & 4 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ type Trunk struct {
ID *uint `json:"id,omitempty"`
}

// CachedNetConf containing NetConfig and original smartnic vf interface
// name (set only in case of ovs hareware offload scenario).
// CachedNetConf containing NetConfig, original smartnic vf interface name
// and kernel/userspace device driver mode of the smartnic vf interface
// (the last two are set only in case of ovs hareware offload scenario).
// this is intended to be used only for storing and retrieving config
// to/from a data store (example file cache).
type CachedNetConf struct {
Netconf *NetConf
OrigIfName string
Netconf *NetConf
OrigIfName string
UserspaceMode bool
}

// CachedPrevResultNetConf containing PrevResult.
Expand Down
Loading