Skip to content

Commit

Permalink
[ARG] Add an option to disable API call cache
Browse files Browse the repository at this point in the history
New option: disableAPICallCache
When ARG is enabled, this option should be true.

Signed-off-by: Zhecheng Li <zhechengli@microsoft.com>
  • Loading branch information
lzhecheng committed Jun 25, 2023
1 parent cb04dcf commit 75d20f8
Show file tree
Hide file tree
Showing 16 changed files with 891 additions and 350 deletions.
14 changes: 12 additions & 2 deletions pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,9 @@ type Config struct {
// If the length is not 0, it is assumed the multiple standard load balancers mode is on. In this case,
// there must be one configuration named “<clustername>” or an error will be reported.
MultipleStandardLoadBalancerConfigurations []MultipleStandardLoadBalancerConfiguration `json:"multipleStandardLoadBalancerConfigurations,omitempty" yaml:"multipleStandardLoadBalancerConfigurations,omitempty"`

// DisableAPICallCache disables the cache for Azure API calls.
DisableAPICallCache bool `json:"disableAPICallCache,omitempty" yaml:"disableAPICallCache,omitempty"`
}

// MultipleStandardLoadBalancerConfiguration stores the properties regarding multiple standard load balancers.
Expand Down Expand Up @@ -742,6 +745,11 @@ func (az *Cloud) getPutVMSSVMBatchSize() int {
}

func (az *Cloud) initCaches() (err error) {
if az.Config.DisableAPICallCache {
klog.Infof("Azure cloud provider API call cache is disabled")
return nil
}

az.vmCache, err = az.newVMCache()
if err != nil {
return err
Expand Down Expand Up @@ -1134,8 +1142,10 @@ func (az *Cloud) SetInformers(informerFactory informers.SharedInformerFactory) {
}
az.updateNodeCaches(node, nil)

klog.V(4).Infof("Removing node %s from VMSet cache.", node.Name)
_ = az.VMSet.DeleteCacheForNode(node.Name)
if !az.Config.DisableAPICallCache {
klog.V(4).Infof("Removing node %s from VMSet cache.", node.Name)
_ = az.VMSet.DeleteCacheForNode(node.Name)
}
},
})
az.nodeInformerSynced = nodeInformer.HasSynced
Expand Down
112 changes: 84 additions & 28 deletions pkg/provider/azure_backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ func (az *Cloud) CreateOrUpdateSecurityGroup(sg network.SecurityGroup) error {
klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
if rerr == nil {
// Invalidate the cache right after updating
_ = az.nsgCache.Delete(*sg.Name)
if !az.Config.DisableAPICallCache {
_ = az.nsgCache.Delete(*sg.Name)
}
return nil
}

Expand All @@ -168,13 +170,17 @@ func (az *Cloud) CreateOrUpdateSecurityGroup(sg network.SecurityGroup) error {
// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("SecurityGroup cache for %s is cleanup because of http.StatusPreconditionFailed", *sg.Name)
_ = az.nsgCache.Delete(*sg.Name)
if !az.Config.DisableAPICallCache {
_ = az.nsgCache.Delete(*sg.Name)
}
}

// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(rerr.Error().Error()), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("SecurityGroup cache for %s is cleanup because CreateOrUpdateSecurityGroup is canceled by another operation", *sg.Name)
_ = az.nsgCache.Delete(*sg.Name)
if !az.Config.DisableAPICallCache {
_ = az.nsgCache.Delete(*sg.Name)
}
}

return rerr.Error()
Expand Down Expand Up @@ -219,7 +225,9 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
if rerr == nil {
// Invalidate the cache right after updating
_ = az.lbCache.Delete(*lb.Name)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(*lb.Name)
}
return nil
}

Expand All @@ -229,14 +237,18 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", pointer.StringDeref(lb.Name, ""))
_ = az.lbCache.Delete(*lb.Name)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(*lb.Name)
}
}

retryErrorMessage := rerr.Error().Error()
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(retryErrorMessage), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", pointer.StringDeref(lb.Name, ""))
_ = az.lbCache.Delete(*lb.Name)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(*lb.Name)
}
}

// The LB update may fail because the referenced PIP is not in the Succeeded provisioning state
Expand All @@ -261,7 +273,9 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
}
// Invalidate the LB cache, return the error, and the controller manager
// would retry the LB update in the next reconcile loop
_ = az.lbCache.Delete(*lb.Name)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(*lb.Name)
}
}

return rerr.Error()
Expand All @@ -275,21 +289,27 @@ func (az *Cloud) CreateOrUpdateLBBackendPool(lbName string, backendPool network.
rerr := az.LoadBalancerClient.CreateOrUpdateBackendPools(ctx, az.getLoadBalancerResourceGroup(), lbName, pointer.StringDeref(backendPool.Name, ""), backendPool, pointer.StringDeref(backendPool.Etag, ""))
if rerr == nil {
// Invalidate the cache right after updating
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
return nil
}

// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", lbName)
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
}

retryErrorMessage := rerr.Error().Error()
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(retryErrorMessage), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", lbName)
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
}

return rerr.Error()
Expand All @@ -303,21 +323,27 @@ func (az *Cloud) DeleteLBBackendPool(lbName, backendPoolName string) error {
rerr := az.LoadBalancerClient.DeleteLBBackendPool(ctx, az.getLoadBalancerResourceGroup(), lbName, backendPoolName)
if rerr == nil {
// Invalidate the cache right after updating
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
return nil
}

// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", lbName)
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
}

retryErrorMessage := rerr.Error().Error()
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(retryErrorMessage), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", lbName)
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
}

return rerr.Error()
Expand Down Expand Up @@ -407,7 +433,9 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string,
klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, pointer.StringDeref(pip.Name, ""))
if rerr == nil {
// Invalidate the cache right after updating
_ = az.pipCache.Delete(pipResourceGroup)
if !az.Config.DisableAPICallCache {
_ = az.pipCache.Delete(pipResourceGroup)
}
return nil
}

Expand All @@ -418,14 +446,18 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string,
// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("PublicIP cache for (%s, %s) is cleanup because of http.StatusPreconditionFailed", pipResourceGroup, pointer.StringDeref(pip.Name, ""))
_ = az.pipCache.Delete(pipResourceGroup)
if !az.Config.DisableAPICallCache {
_ = az.pipCache.Delete(pipResourceGroup)
}
}

retryErrorMessage := rerr.Error().Error()
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(retryErrorMessage), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("PublicIP cache for (%s, %s) is cleanup because CreateOrUpdate is canceled by another operation", pipResourceGroup, pointer.StringDeref(pip.Name, ""))
_ = az.pipCache.Delete(pipResourceGroup)
if !az.Config.DisableAPICallCache {
_ = az.pipCache.Delete(pipResourceGroup)
}
}

return rerr.Error()
Expand Down Expand Up @@ -465,7 +497,9 @@ func (az *Cloud) DeletePublicIP(service *v1.Service, pipResourceGroup string, pi
}

// Invalidate the cache right after deleting
_ = az.pipCache.Delete(pipResourceGroup)
if !az.Config.DisableAPICallCache {
_ = az.pipCache.Delete(pipResourceGroup)
}
return nil
}

Expand All @@ -478,7 +512,9 @@ func (az *Cloud) DeleteLB(service *v1.Service, lbName string) *retry.Error {
rerr := az.LoadBalancerClient.Delete(ctx, rgName, lbName)
if rerr == nil {
// Invalidate the cache right after updating
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
return nil
}

Expand All @@ -495,7 +531,9 @@ func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error {
rerr := az.RouteTablesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, routeTable, pointer.StringDeref(routeTable.Etag, ""))
if rerr == nil {
// Invalidate the cache right after updating
_ = az.rtCache.Delete(*routeTable.Name)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(*routeTable.Name)
}
return nil
}

Expand All @@ -505,12 +543,16 @@ func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error {
// Invalidate the cache because etag mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("Route table cache for %s is cleanup because of http.StatusPreconditionFailed", *routeTable.Name)
_ = az.rtCache.Delete(*routeTable.Name)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(*routeTable.Name)
}
}
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(rerr.Error().Error()), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("Route table cache for %s is cleanup because CreateOrUpdateRouteTable is canceled by another operation", *routeTable.Name)
_ = az.rtCache.Delete(*routeTable.Name)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(*routeTable.Name)
}
}
klog.Errorf("RouteTablesClient.CreateOrUpdate(%s) failed: %v", az.RouteTableName, rerr.Error())
return rerr.Error()
Expand All @@ -524,18 +566,24 @@ func (az *Cloud) CreateOrUpdateRoute(route network.Route) error {
rerr := az.RoutesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, *route.Name, route, pointer.StringDeref(route.Etag, ""))
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name)
if rerr == nil {
_ = az.rtCache.Delete(az.RouteTableName)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(az.RouteTableName)
}
return nil
}

if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("Route cache for %s is cleanup because of http.StatusPreconditionFailed", *route.Name)
_ = az.rtCache.Delete(az.RouteTableName)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(az.RouteTableName)
}
}
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(rerr.Error().Error()), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("Route cache for %s is cleanup because CreateOrUpdateRouteTable is canceled by another operation", *route.Name)
_ = az.rtCache.Delete(az.RouteTableName)
if !az.Config.DisableAPICallCache {
_ = az.rtCache.Delete(az.RouteTableName)
}
}
return rerr.Error()
}
Expand Down Expand Up @@ -590,7 +638,9 @@ func (az *Cloud) CreateOrUpdatePLS(service *v1.Service, pls network.PrivateLinkS
rerr := az.PrivateLinkServiceClient.CreateOrUpdate(ctx, az.PrivateLinkServiceResourceGroup, pointer.StringDeref(pls.Name, ""), pls, pointer.StringDeref(pls.Etag, ""))
if rerr == nil {
// Invalidate the cache right after updating
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
if !az.Config.DisableAPICallCache {
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
}
return nil
}

Expand All @@ -600,12 +650,16 @@ func (az *Cloud) CreateOrUpdatePLS(service *v1.Service, pls network.PrivateLinkS
// Invalidate the cache because etag mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("Private link service cache for %s is cleanup because of http.StatusPreconditionFailed", pointer.StringDeref(pls.Name, ""))
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
if !az.Config.DisableAPICallCache {
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
}
}
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(rerr.Error().Error()), consts.OperationCanceledErrorMessage) {
klog.V(3).Infof("Private link service for %s is cleanup because CreateOrUpdatePrivateLinkService is canceled by another operation", pointer.StringDeref(pls.Name, ""))
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
if !az.Config.DisableAPICallCache {
_ = az.plsCache.Delete(pointer.StringDeref((*pls.LoadBalancerFrontendIPConfigurations)[0].ID, ""))
}
}
klog.Errorf("PrivateLinkServiceClient.CreateOrUpdate(%s) failed: %v", pointer.StringDeref(pls.Name, ""), rerr.Error())
return rerr.Error()
Expand All @@ -619,7 +673,9 @@ func (az *Cloud) DeletePLS(service *v1.Service, plsName string, plsLBFrontendID
rerr := az.PrivateLinkServiceClient.Delete(ctx, az.PrivateLinkServiceResourceGroup, plsName)
if rerr == nil {
// Invalidate the cache right after deleting
_ = az.plsCache.Delete(plsLBFrontendID)
if !az.Config.DisableAPICallCache {
_ = az.plsCache.Delete(plsLBFrontendID)
}
return nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/provider/azure_controller_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ func (as *availabilitySet) AttachDisk(ctx context.Context, nodeName types.NodeNa
}

func (as *availabilitySet) DeleteCacheForNode(nodeName string) error {
if as.Config.DisableAPICallCache {
return nil
}
err := as.cloud.vmCache.Delete(nodeName)
if err == nil {
klog.V(2).Infof("DeleteCacheForNode(%s) successfully", nodeName)
Expand Down Expand Up @@ -265,6 +268,9 @@ func (as *availabilitySet) UpdateVMAsync(ctx context.Context, nodeName types.Nod
}

func (as *availabilitySet) updateCache(nodeName string, vm *compute.VirtualMachine) {
if as.Config.DisableAPICallCache {
return
}
if as.common.DisableUpdateCache {
return
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/provider/azure_instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ func (az *Cloud) getLocalInstanceNodeAddresses(netInterfaces []NetworkInterface,

if len(addresses) == 1 {
// No IP addresses is got from instance metadata service, clean up cache and report errors.
_ = az.Metadata.imsCache.Delete(consts.MetadataCacheKey)
if !az.Config.DisableAPICallCache {
_ = az.Metadata.imsCache.Delete(consts.MetadataCacheKey)
}
return nil, fmt.Errorf("get empty IP addresses from instance metadata service")
}
return addresses, nil
Expand Down
16 changes: 12 additions & 4 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ func (az *Cloud) removeFrontendIPConfigurationFromLoadBalancer(lb *network.LoadB
klog.Errorf("%s: failed to CreateOrUpdateLB: %v", logPrefix, err)
return err
}
_ = az.lbCache.Delete(pointer.StringDeref(lb.Name, ""))
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(pointer.StringDeref(lb.Name, ""))
}
}
return nil
}
Expand Down Expand Up @@ -532,7 +534,9 @@ func (az *Cloud) safeDeleteLoadBalancer(lb network.LoadBalancer, clusterName, vm
if rerr := az.DeleteLB(service, pointer.StringDeref(lb.Name, "")); rerr != nil {
return rerr
}
_ = az.lbCache.Delete(pointer.StringDeref(lb.Name, ""))
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(pointer.StringDeref(lb.Name, ""))
}

return nil
}
Expand Down Expand Up @@ -1704,7 +1708,9 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service,
vmSetName := az.mapLoadBalancerNameToVMSet(lbName, clusterName)
// Etag would be changed when updating backend pools, so invalidate lbCache after it.
defer func() {
_ = az.lbCache.Delete(lbName)
if !az.Config.DisableAPICallCache {
_ = az.lbCache.Delete(lbName)
}
}()

if lb.LoadBalancerPropertiesFormat != nil && lb.LoadBalancerPropertiesFormat.BackendAddressPools != nil {
Expand Down Expand Up @@ -2726,7 +2732,9 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
return nil, err
}
klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): end", *sg.Name)
_ = az.nsgCache.Delete(pointer.StringDeref(sg.Name, ""))
if !az.Config.DisableAPICallCache {
_ = az.nsgCache.Delete(pointer.StringDeref(sg.Name, ""))
}
}
return &sg, nil
}
Expand Down
Loading

0 comments on commit 75d20f8

Please sign in to comment.