-
Notifications
You must be signed in to change notification settings - Fork 76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
handle smartnic cleanup when container net namespace is empty #129
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -437,9 +437,27 @@ func CmdDel(args *skel.CmdArgs) error { | |
if args.Netns == "" { | ||
// The CNI_NETNS parameter may be empty according to version 0.4.0 | ||
// of the CNI spec (https://github.com/containernetworking/cni/blob/spec-v0.4.0/SPEC.md). | ||
// In accordance with the spec we clean up as many resources as possible. | ||
if err := cleanPorts(ovsDriver); err != nil { | ||
return err | ||
if netconf.DeviceID != "" { | ||
// SR-IOV Case - The sriov device is moved into host network namespace when args.Netns is empty. | ||
// This happens container is killed due to an error (example: CrashLoopBackOff, OOMKilled) | ||
var rep string | ||
if rep, err = sriov.GetNetRepresentor(netconf.DeviceID); err != nil { | ||
return err | ||
} | ||
if err = removeOvsPort(ovsDriver, rep); err != nil { | ||
// Don't throw err as delete can be called multiple times because of error in ResetVF and ovs | ||
// port is already deleted in a previous invocation. so log it and proceed further. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was needed so that ResetVF is called at line#452 to rename the smartvf back to original name in case ResetVF thrown ErrLinkNotFound error in previous CmdDel call (noticed this behaviour in my tests). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, sorry for not being clear enough. I meant the sentence in the comment: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah! yes. is it ok now ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Looks good to me |
||
log.Printf("removal of ovs port %s is failed for device %s , it may be removed already"+ | ||
" by previous delete call, err %v", rep, netconf.DeviceID, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In other places in this module, we just log.Printf("Error: %v\n", err) in this situation, it may be better to stick with that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, good. done now. |
||
} | ||
if err = sriov.ResetVF(args, netconf.DeviceID); err != nil { | ||
return err | ||
} | ||
} else { | ||
// In accordance with the spec we clean up as many resources as possible. | ||
if err := cleanPorts(ovsDriver); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ import ( | |
"github.com/Mellanox/sriovnet" | ||
"github.com/containernetworking/cni/pkg/skel" | ||
"github.com/containernetworking/cni/pkg/types/current" | ||
"github.com/containernetworking/plugins/pkg/ip" | ||
"github.com/containernetworking/plugins/pkg/ns" | ||
"github.com/vishvananda/netlink" | ||
) | ||
|
@@ -58,6 +59,34 @@ func getVFLinkName(pciAddr string) (string, error) { | |
return names[0], nil | ||
} | ||
|
||
// GetNetRepresentor retrieves network representor device for smartvf | ||
func GetNetRepresentor(deviceID string) (string, error) { | ||
// get Uplink netdevice. The uplink is basically the PF name of the deviceID (smart VF). | ||
// The uplink is later used to retrieve the representor for the smart VF. | ||
uplink, err := sriovnet.GetUplinkRepresentor(deviceID) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
// get smart VF index from PCI | ||
vfIndex, err := sriovnet.GetVfIndexByPciAddress(deviceID) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
// get smart VF representor interface. This is a host net device which represents | ||
// smart VF attached inside the container by device plugin. It can be considered | ||
// as one end of veth pair whereas other end is smartVF. The VF representor would | ||
// get added into ovs bridge for the control plane configuration. | ||
rep, err := sriovnet.GetVfRepresentor(uplink, vfIndex) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return rep, nil | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extra newline There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
} | ||
|
||
// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device | ||
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) { | ||
hostIface := ¤t.Interface{} | ||
|
@@ -84,24 +113,8 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int | |
} | ||
vfNetdevice := vfNetdevices[0] | ||
|
||
// get Uplink netdevice. The uplink is basically the PF name of the deviceID (smart VF). | ||
// The uplink is later used to retrieve the representor for the smart VF. | ||
uplink, err := sriovnet.GetUplinkRepresentor(deviceID) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
// get smart VF index from PCI | ||
vfIndex, err := sriovnet.GetVfIndexByPciAddress(deviceID) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
// get smart VF representor interface. This is a host net device which represents | ||
// smart VF attached inside the container by device plugin. It can be considered | ||
// as one end of veth pair whereas other end is smartVF. The VF representor would | ||
// get added into ovs bridge for the control plane configuration. | ||
rep, err := sriovnet.GetVfRepresentor(uplink, vfIndex) | ||
// network representor device for smartvf | ||
rep, err := GetNetRepresentor(deviceID) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
@@ -226,3 +239,32 @@ func ReleaseVF(args *skel.CmdArgs) error { | |
}) | ||
|
||
} | ||
|
||
// ResetVF reset the VF which accidently moved into default network namespace by a container failure | ||
func ResetVF(args *skel.CmdArgs, deviceID string) error { | ||
hostIFName, cRefPath, err := LoadHostIFNameFromCache(args) | ||
if err != nil { | ||
return err | ||
} | ||
// get smart VF netdevice from PCI | ||
vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID) | ||
if err != nil { | ||
return err | ||
} | ||
// Make sure we have 1 netdevice per pci address | ||
if len(vfNetdevices) != 1 { | ||
// This would happen if netdevice is not yet visible in default network namespace. | ||
// so return ErrLinkNotFound error so that multus can attempt multiple times | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this behaviour specific for multus only? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this can happen with any meta plugins that use ovs-cni, but I've tested only with multus. now changed it into generic term meta plugin. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you |
||
// until link is available. | ||
return ip.ErrLinkNotFound | ||
} | ||
_, err = renameLink(vfNetdevices[0], hostIFName) | ||
if err != nil { | ||
return err | ||
} | ||
// remove the cache entry if everything cleaned up for the device. | ||
if cRefPath != "" { | ||
CleanCachedConf(cRefPath) | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please, use
:=
assignment instead ofvar rep string
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Couldn't use := as rep variable is used at line#447 (outside if scope).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, thanks