Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize tidying up dangling secret-id accessors #6252

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion builtin/credential/approle/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ type backend struct {
// for all the SecretIDs issued against an approle
secretIDListingLock sync.RWMutex

testTidyDelay time.Duration
testTidyAccessorDelay time.Duration
testTidyDelay time.Duration
}

func Factory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
Expand Down
70 changes: 18 additions & 52 deletions builtin/credential/approle/path_tidy_user_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
accessorMap[accessorHash] = true
}

time.Sleep(b.testTidyDelay)

secretIDCleanupFunc := func(secretIDHMAC, roleNameHMAC, secretIDPrefixToUse string) error {
checkCount++
lock := b.secretIDLock(secretIDHMAC)
Expand Down Expand Up @@ -148,6 +146,13 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
return nil
}

// Wait to get a lock for each of the secretIDs to make sure we avoid a race conditon
// where the accessor has been written but not yet the secretIDHMAC
for _, lock := range b.secretIDLocks {
lock.Lock()
lock.Unlock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this still doesn't seem right to me. I've never seen locks grabbed before without doing some work before releasing them. I suspect that in a racy production environment this would not prevent races, though it would slow the code execution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect that in a racy production environment this would not prevent races, though it would slow the code execution.

Are you referring to the race condition I'm describing in #6252 (comment) ? Or is there another one that you can see that could potentially cause issues?

@jefferai are you able to chime in? I don't think either of us are 100% sure if my proposal here is safe and you are the original author of the dangling accessor cleanup code.

}

for _, roleNameHMAC := range roleNameHMACs {
logger.Trace("listing secret ID HMACs", "role_hmac", roleNameHMAC)
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC))
Expand All @@ -162,65 +167,26 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi
}
}

// Fake delay added during testing to simulate a race condition where the accessor has been written but not yet the secretIDHMAC
time.Sleep(b.testTidyDelay)

// Accessor indexes were not getting cleaned up until 0.9.3. This is a fix
// to clean up the dangling accessor entries.
if len(accessorMap) > 0 {
for _, lock := range b.secretIDLocks {
lock.Lock()
defer lock.Unlock()
}
for accessorHash, _ := range accessorMap {
logger.Trace("found dangling accessor, verifying")
// Ideally, locking on accessors should be performed here too
// but for that, accessors are required in plaintext, which are
// not available. The code above helps but it may still be
// racy.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not certain that we should change this code. It sounds like it's already racy. I'm not clear on where we might place the unlock for the mutex, but it seems like anything that holds the lock for a shorter amount of time would result in racier behavior. Perhaps I'm not thinking of it the right way and it'll be clearer once it's pushed.

// ...
// Look up the secret again now that we have all the locks. The
// lock is held when writing accessor/secret so if we have the
// lock we know we're not in a
// wrote-accessor-but-not-yet-secret case, which can be racy.
var entry secretIDAccessorStorageEntry
// This simulates an artificial delay for cleaning up each accessor
// It is used during testing to make sure that large amounts of accessors
// will not block new secret-ids from being created
time.Sleep(b.testTidyAccessorDelay)
logger.Trace(fmt.Sprintf("dangling accessorMap length: %d", len(accessorMap)))
for accessorHash := range accessorMap {
logger.Trace("found dangling accessor, removing", accessorHash)
entryIndex := accessorIDPrefixToUse + accessorHash
se, err := s.Get(ctx, entryIndex)
err = s.Delete(ctx, entryIndex)
if err != nil {
return err
}
if se != nil {
err = se.DecodeJSON(&entry)
if err != nil {
return err
}

// The storage entry doesn't store the role ID, so we have
// to go about this the long way; fortunately we shouldn't
// actually hit this very often
var found bool
searchloop:
for _, roleNameHMAC := range roleNameHMACs {
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC))
if err != nil {
return err
}
for _, v := range secretIDHMACs {
if v == entry.SecretIDHMAC {
found = true
logger.Trace("accessor verified, not removing")
break searchloop
}
}
}
if !found {
logger.Trace("could not verify dangling accessor, removing")
err = s.Delete(ctx, entryIndex)
if err != nil {
return err
}
}
}
}
}

return nil
}

Expand Down
66 changes: 66 additions & 0 deletions builtin/credential/approle/path_tidy_user_id_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,69 @@ func TestAppRole_TidyDanglingAccessors_RaceTest(t *testing.T) {
t.Fatalf("bad: len(secretIDs); expect %d, got %d", count, len(secretIDs))
}
}

func TestAppRole_TidyDanglingAccessors_BlockedLogins(t *testing.T) {
var resp *logical.Response
var err error
b, storage := createBackendWithStorage(t)

b.testTidyAccessorDelay = 10 * time.Second

// Create a role
createRole(t, b, storage, "role1", "a,b,c")

// Create a secret-id
roleSecretIDReq := &logical.Request{
Operation: logical.UpdateOperation,
Path: "role/role1/secret-id",
Storage: storage,
}
resp, err = b.HandleRequest(context.Background(), roleSecretIDReq)
if err != nil || (resp != nil && resp.IsError()) {
t.Fatalf("err:%v resp:%#v", err, resp)
}

accessorHashes, err := storage.List(context.Background(), "accessor/")
if err != nil {
t.Fatal(err)
}
if len(accessorHashes) != 1 {
t.Fatalf("bad: len(accessorHashes); expect 1, got %d", len(accessorHashes))
}

entry1, err := logical.StorageEntryJSON(
"accessor/invalid1",
&secretIDAccessorStorageEntry{
SecretIDHMAC: "samplesecretidhmac",
},
)
err = storage.Put(context.Background(), entry1)
if err != nil {
t.Fatal(err)
}

_, err = b.tidySecretID(context.Background(), &logical.Request{
Storage: storage,
})
if err != nil {
t.Fatal(err)
}

for i := 1; i <= 10; i++ {
start := time.Now()
roleSecretIDReq := &logical.Request{
Operation: logical.UpdateOperation,
Path: "role/role1/secret-id",
Storage: storage,
}
resp, err = b.HandleRequest(context.Background(), roleSecretIDReq)
if err != nil || (resp != nil && resp.IsError()) {
t.Fatalf("err:%v resp:%#v", err, resp)
}
elapsed := time.Since(start).Seconds()
if elapsed > 5.0 {
t.Fatalf("Request failed to complete before 5 second timeout: %f", elapsed)
}
time.Sleep(1 * time.Second)
}
}