Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[begin]: event_factory should cancel Close context in case of reselect #1519

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/networkservice/common/begin/event_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ func (f *eventFactoryClient) Request(opts ...Option) <-chan error {
request := f.request.Clone()
if o.reselect {
ctx, cancel := f.ctxFunc()
defer cancel()
_, _ = f.client.Close(ctx, request.GetConnection(), f.opts...)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM,
ran the test on my side, works fine

Just a small question about the source of the issue: Am I correct in understanding that the original problem occurs because the cancel() for Request was run before the cancel() for Close, because they both use the defer directive?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. Close can use the context somewhere in its goroutine.
And this goroutine will be alive as long as this context is alive. But after the parent Close is completed, this is unnecessary and even dangerous - the goroutine can use the same data as the subsequent Request.
Therefore, before the Request, we need to complete Close call (including its goroutines)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thanks for explanation!

if request.GetConnection() != nil {
request.GetConnection().Mechanism = nil
request.GetConnection().NetworkServiceEndpointName = ""
request.GetConnection().State = networkservice.State_RESELECT_REQUESTED
}
cancel()
}
ctx, cancel := f.ctxFunc()
defer cancel()
Expand Down
59 changes: 58 additions & 1 deletion pkg/networkservice/common/begin/event_factory_client_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022 Cisco and/or its affiliates.
// Copyright (c) 2022-2023 Cisco and/or its affiliates.
//
// SPDX-License-Identifier: Apache-2.0
//
Expand All @@ -18,6 +18,8 @@ package begin_test

import (
"context"

"sync/atomic"
"testing"
"time"

Expand All @@ -33,6 +35,8 @@ import (
"github.com/networkservicemesh/sdk/pkg/networkservice/common/begin"
"github.com/networkservicemesh/sdk/pkg/networkservice/core/chain"
"github.com/networkservicemesh/sdk/pkg/networkservice/core/next"
"github.com/networkservicemesh/sdk/pkg/networkservice/utils/checks/checkcontext"
"github.com/networkservicemesh/sdk/pkg/networkservice/utils/count"
"github.com/networkservicemesh/sdk/pkg/tools/clock"
"github.com/networkservicemesh/sdk/pkg/tools/clockmock"
)
Expand Down Expand Up @@ -166,6 +170,54 @@ func TestContextTimeout_Client(t *testing.T) {
eventFactoryCl.callClose()
}

// This test checks if the eventFactory reselect option cancels Close ctx correctly
func TestContextCancellationOnReselect(t *testing.T) {
t.Cleanup(func() { goleak.VerifyNone(t) })

ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
defer cancel()

var closeCall atomic.Bool
ch := make(chan struct{}, 1)

eventFactoryCl := &eventFactoryClient{}
counter := &count.Client{}
client := chain.NewNetworkServiceClient(
begin.NewClient(),
eventFactoryCl,
checkcontext.NewClient(t, func(t *testing.T, reqCtx context.Context) {
if !closeCall.Load() {
<-ch
} else {
closeCall.Store(false)
go func() {
<-reqCtx.Done()
ch <- struct{}{}
}()
}
}),
counter,
)

// Do Request
// Write to ch for the first request
ch <- struct{}{}
request := testRequest("1")
conn, err := client.Request(ctx, request.Clone())
require.NotNil(t, t, conn)
require.NoError(t, err)

// Call Reselect
// It will call Close first
closeCall.Store(true)
eventFactoryCl.callReselect()

// Waiting for the re-request
require.Eventually(t, func() bool {
return counter.Requests() == 2
}, time.Second, time.Millisecond*100)
}

type eventFactoryClient struct {
ctx context.Context
}
Expand All @@ -191,6 +243,11 @@ func (s *eventFactoryClient) callRefresh() {
<-eventFactory.Request()
}

func (s *eventFactoryClient) callReselect() {
eventFactory := begin.FromContext(s.ctx)
eventFactory.Request(begin.WithReselect())
}

type contextKey struct{}

type checkContextClient struct {
Expand Down