-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize tidying up dangling secret-id accessors #6252
Changes from all commits
6e96d87
bbbf5e8
f7e6566
c56f299
9708c3a
7bd0e78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,8 +77,6 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi | |
accessorMap[accessorHash] = true | ||
} | ||
|
||
time.Sleep(b.testTidyDelay) | ||
|
||
secretIDCleanupFunc := func(secretIDHMAC, roleNameHMAC, secretIDPrefixToUse string) error { | ||
checkCount++ | ||
lock := b.secretIDLock(secretIDHMAC) | ||
|
@@ -148,6 +146,13 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi | |
return nil | ||
} | ||
|
||
// Wait to get a lock for each of the secretIDs to make sure we avoid a race conditon | ||
// where the accessor has been written but not yet the secretIDHMAC | ||
for _, lock := range b.secretIDLocks { | ||
lock.Lock() | ||
lock.Unlock() | ||
} | ||
|
||
for _, roleNameHMAC := range roleNameHMACs { | ||
logger.Trace("listing secret ID HMACs", "role_hmac", roleNameHMAC) | ||
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC)) | ||
|
@@ -162,65 +167,26 @@ func (b *backend) tidySecretID(ctx context.Context, req *logical.Request) (*logi | |
} | ||
} | ||
|
||
// Fake delay added during testing to simulate a race condition where the accessor has been written but not yet the secretIDHMAC | ||
time.Sleep(b.testTidyDelay) | ||
|
||
// Accessor indexes were not getting cleaned up until 0.9.3. This is a fix | ||
// to clean up the dangling accessor entries. | ||
if len(accessorMap) > 0 { | ||
for _, lock := range b.secretIDLocks { | ||
lock.Lock() | ||
defer lock.Unlock() | ||
} | ||
for accessorHash, _ := range accessorMap { | ||
logger.Trace("found dangling accessor, verifying") | ||
// Ideally, locking on accessors should be performed here too | ||
// but for that, accessors are required in plaintext, which are | ||
// not available. The code above helps but it may still be | ||
// racy. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not certain that we should change this code. It sounds like it's already racy. I'm not clear on where we might place the unlock for the mutex, but it seems like anything that holds the lock for a shorter amount of time would result in racier behavior. Perhaps I'm not thinking of it the right way and it'll be clearer once it's pushed. |
||
// ... | ||
// Look up the secret again now that we have all the locks. The | ||
// lock is held when writing accessor/secret so if we have the | ||
// lock we know we're not in a | ||
// wrote-accessor-but-not-yet-secret case, which can be racy. | ||
var entry secretIDAccessorStorageEntry | ||
// This simulates an artificial delay for cleaning up each accessor | ||
// It is used during testing to make sure that large amounts of accessors | ||
// will not block new secret-ids from being created | ||
time.Sleep(b.testTidyAccessorDelay) | ||
logger.Trace(fmt.Sprintf("dangling accessorMap length: %d", len(accessorMap))) | ||
for accessorHash := range accessorMap { | ||
logger.Trace("found dangling accessor, removing", accessorHash) | ||
entryIndex := accessorIDPrefixToUse + accessorHash | ||
se, err := s.Get(ctx, entryIndex) | ||
err = s.Delete(ctx, entryIndex) | ||
if err != nil { | ||
return err | ||
} | ||
if se != nil { | ||
err = se.DecodeJSON(&entry) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// The storage entry doesn't store the role ID, so we have | ||
// to go about this the long way; fortunately we shouldn't | ||
// actually hit this very often | ||
var found bool | ||
searchloop: | ||
for _, roleNameHMAC := range roleNameHMACs { | ||
secretIDHMACs, err := s.List(ctx, fmt.Sprintf("%s%s", secretIDPrefixToUse, roleNameHMAC)) | ||
if err != nil { | ||
return err | ||
} | ||
for _, v := range secretIDHMACs { | ||
if v == entry.SecretIDHMAC { | ||
found = true | ||
logger.Trace("accessor verified, not removing") | ||
break searchloop | ||
} | ||
} | ||
} | ||
if !found { | ||
logger.Trace("could not verify dangling accessor, removing") | ||
err = s.Delete(ctx, entryIndex) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this still doesn't seem right to me. I've never seen locks grabbed before without doing some work before releasing them. I suspect that in a racy production environment this would not prevent races, though it would slow the code execution.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you referring to the race condition I'm describing in #6252 (comment) ? Or is there another one that you can see that could potentially cause issues?
@jefferai are you able to chime in? I don't think either of us are 100% sure if my proposal here is safe and you are the original author of the dangling accessor cleanup code.