Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat) internal/civisibility: add Known Tests feature and refactor EFD logic V2 #3140

Merged
merged 5 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions internal/civisibility/constants/test_tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ const (
// This constant is used to tag test events that are part of a retry execution
TestIsRetry = "test.is_retry"

// TestRetryReason indicates the reason for retrying the test
TestRetryReason = "test.retry_reason"

// TestEarlyFlakeDetectionRetryAborted indicates a retry abort reason by the early flake detection feature
TestEarlyFlakeDetectionRetryAborted = "test.early_flake.abort_reason"

Expand Down
27 changes: 16 additions & 11 deletions internal/civisibility/integrations/civisibility_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ var (
// ciVisibilitySettings contains the CI Visibility settings for this session
ciVisibilitySettings net.SettingsResponseData

// ciVisibilityEarlyFlakyDetectionSettings contains the CI Visibility Early Flake Detection data for this session
ciVisibilityEarlyFlakyDetectionSettings net.EfdResponseData
// ciVisibilityKnownTests contains the CI Visibility Known Tests data for this session
ciVisibilityKnownTests net.KnownTestsResponseData

// ciVisibilityFlakyRetriesSettings contains the CI Visibility Flaky Retries settings for this session
ciVisibilityFlakyRetriesSettings FlakyRetriesSetting
Expand Down Expand Up @@ -121,15 +121,20 @@ func ensureAdditionalFeaturesInitialization(serviceName string) {
return
}

// if early flake detection is enabled then we run the early flake detection request
if ciVisibilitySettings.EarlyFlakeDetection.Enabled {
ciEfdData, err := ciVisibilityClient.GetEarlyFlakeDetectionData()
// if early flake detection is enabled then we run the known tests request
if ciVisibilitySettings.KnownTestsEnabled {
ciEfdData, err := ciVisibilityClient.GetKnownTests()
if err != nil {
log.Error("civisibility: error getting CI visibility early flake detection data: %v", err)
log.Error("civisibility: error getting CI visibility known tests data: %v", err)
} else if ciEfdData != nil {
ciVisibilityEarlyFlakyDetectionSettings = *ciEfdData
log.Debug("civisibility: early flake detection data loaded.")
ciVisibilityKnownTests = *ciEfdData
log.Debug("civisibility: known tests data loaded.")
}
} else {
// "known_tests_enabled" parameter works as a kill-switch for EFD, so if “known_tests_enabled” is false it
// will disable EFD even if “early_flake_detection.enabled” is set to true (which should not happen normally,
// the backend should disable both of them in that case)
ciVisibilitySettings.EarlyFlakeDetection.Enabled = false
}

// if flaky test retries is enabled then let's load the flaky retries settings
Expand Down Expand Up @@ -172,11 +177,11 @@ func GetSettings() *net.SettingsResponseData {
return &ciVisibilitySettings
}

// GetEarlyFlakeDetectionSettings gets the early flake detection known tests data
func GetEarlyFlakeDetectionSettings() *net.EfdResponseData {
// GetKnownTests gets the known tests data
func GetKnownTests() *net.KnownTestsResponseData {
// call to ensure the additional features initialization is completed (service name can be null here)
ensureAdditionalFeaturesInitialization("")
return &ciVisibilityEarlyFlakyDetectionSettings
return &ciVisibilityKnownTests
}

// GetFlakyRetriesSettings gets the flaky retries settings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ type MockClient struct {
SendCoveragePayloadFunc func(ciTestCovPayload io.Reader) error
SendCoveragePayloadWithFormatFunc func(ciTestCovPayload io.Reader, format string) error
GetSettingsFunc func() (*net.SettingsResponseData, error)
GetEarlyFlakeDetectionDataFunc func() (*net.EfdResponseData, error)
GetKnownTestsFunc func() (*net.KnownTestsResponseData, error)
GetCommitsFunc func(localCommits []string) ([]string, error)
SendPackFilesFunc func(commitSha string, packFiles []string) (bytes int64, err error)
GetSkippableTestsFunc func() (correlationId string, skippables map[string]map[string][]net.SkippableResponseDataAttributes, err error)
Expand All @@ -91,8 +91,8 @@ func (m *MockClient) GetSettings() (*net.SettingsResponseData, error) {
return m.GetSettingsFunc()
}

func (m *MockClient) GetEarlyFlakeDetectionData() (*net.EfdResponseData, error) {
return m.GetEarlyFlakeDetectionDataFunc()
func (m *MockClient) GetKnownTests() (*net.KnownTestsResponseData, error) {
return m.GetKnownTestsFunc()
}

func (m *MockClient) GetCommits(localCommits []string) ([]string, error) {
Expand Down
201 changes: 111 additions & 90 deletions internal/civisibility/integrations/gotesting/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"fmt"
"reflect"
"runtime"
"slices"
"sync"
"sync/atomic"
"testing"
Expand All @@ -36,7 +35,9 @@ type (
panicData any // panic data recovered from an internal test execution when using an additional feature wrapper
panicStacktrace string // stacktrace from the panic recovered from an internal test
isARetry bool // flag to tag if a current test execution is a retry
isANewTest bool // flag to tag if a current test execution is part of a new test (EFD not known test)
isANewTest bool // flag to tag if a current test execution is part of a new test
isEFDExecution bool // flag to tag if a current test execution is part of an EFD execution
isATRExecution bool // flag to tag if a current test execution is part of an ATR execution
hasAdditionalFeatureWrapper bool // flag to check if the current execution is part of an additional feature wrapper
}

Expand Down Expand Up @@ -191,20 +192,29 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*
initialRetryCount: flakyRetrySettings.RetryCount,
adjustRetryCount: nil, // No adjustRetryCount
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
remainingTotalRetries := atomic.AddInt64(&flakyRetrySettings.RemainingTotalRetryCount, -1)
// Decide whether to retry
return ptrToLocalT.Failed() && remainingRetries >= 0 && remainingTotalRetries >= 0
return ptrToLocalT.Failed() && remainingRetries >= 0 && atomic.LoadInt64(&flakyRetrySettings.RemainingTotalRetryCount) >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
if executionIndex > 0 {
atomic.AddInt64(&flakyRetrySettings.RemainingTotalRetryCount, -1)
}
},
perExecution: nil, // No perExecution needed
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update original `t` with results from last execution
tCommonPrivates := getTestPrivateFields(t)
if tCommonPrivates == nil {
panic("getting test private fields failed")
}
tCommonPrivates.SetFailed(lastPtrToLocalT.Failed())
tCommonPrivates.SetSkipped(lastPtrToLocalT.Skipped())

// Update parent status if failed
if lastPtrToLocalT.Failed() {
tParentCommonPrivates := getTestParentPrivateFields(t)
if tParentCommonPrivates == nil {
panic("getting test parent private fields failed")
}
tParentCommonPrivates.SetFailed(true)
}

Expand All @@ -218,14 +228,17 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*
}

fmt.Printf(" [ %v after %v retries by Datadog's auto test retries ]\n", status, executionIndex)
}

// Check if total retry count was exceeded
if flakyRetrySettings.RemainingTotalRetryCount < 1 {
fmt.Println(" the maximum number of total retries was exceeded.")
// Check if total retry count was exceeded
if atomic.LoadInt64(&flakyRetrySettings.RemainingTotalRetryCount) < 1 {
fmt.Println(" the maximum number of total retries was exceeded.")
}
}
},
execMetaAdjust: nil, // No execMetaAdjust needed
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag ATR execution to true
execMeta.isATRExecution = true
},
})
}, true
}
Expand All @@ -234,89 +247,82 @@ func applyFlakyTestRetriesAdditionalFeature(targetFunc func(*testing.T)) (func(*

// applyEarlyFlakeDetectionAdditionalFeature applies the early flake detection feature as a wrapper of a func(*testing.T)
func applyEarlyFlakeDetectionAdditionalFeature(testInfo *commonInfo, targetFunc func(*testing.T), settings *net.SettingsResponseData) (func(*testing.T), bool) {
earlyFlakeDetectionData := integrations.GetEarlyFlakeDetectionSettings()
if earlyFlakeDetectionData != nil &&
len(earlyFlakeDetectionData.Tests) > 0 {

// Define is a known test flag
isAKnownTest := false

// Check if the test is a known test or a new one
if knownSuites, ok := earlyFlakeDetectionData.Tests[testInfo.moduleName]; ok {
if knownTests, ok := knownSuites[testInfo.suiteName]; ok {
if slices.Contains(knownTests, testInfo.testName) {
isAKnownTest = true
}
}
}
isKnown, hasKnownData := isKnownTest(testInfo)
if !hasKnownData || isKnown {
return targetFunc, false
}

// If it's a new test, then we apply the EFD wrapper
if !isAKnownTest {
return func(t *testing.T) {
var testPassCount, testSkipCount, testFailCount int

runTestWithRetry(&runTestWithRetryOptions{
targetFunc: targetFunc,
t: t,
initialRetryCount: 0,
adjustRetryCount: func(duration time.Duration) int64 {
slowTestRetriesSettings := settings.EarlyFlakeDetection.SlowTestRetries
durationSecs := duration.Seconds()
if durationSecs < 5 {
return int64(slowTestRetriesSettings.FiveS)
} else if durationSecs < 10 {
return int64(slowTestRetriesSettings.TenS)
} else if durationSecs < 30 {
return int64(slowTestRetriesSettings.ThirtyS)
} else if duration.Minutes() < 5 {
return int64(slowTestRetriesSettings.FiveM)
}
return 0
},
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
return remainingRetries >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
// Collect test results
if ptrToLocalT.Failed() {
testFailCount++
} else if ptrToLocalT.Skipped() {
testSkipCount++
} else {
testPassCount++
}
},
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update test status based on collected counts
tCommonPrivates := getTestPrivateFields(t)
// If it's a new test, then we apply the EFD wrapper
return func(t *testing.T) {
var testPassCount, testSkipCount, testFailCount int

runTestWithRetry(&runTestWithRetryOptions{
targetFunc: targetFunc,
t: t,
initialRetryCount: 0,
adjustRetryCount: func(duration time.Duration) int64 {
slowTestRetriesSettings := settings.EarlyFlakeDetection.SlowTestRetries
durationSecs := duration.Seconds()
if durationSecs < 5 {
return int64(slowTestRetriesSettings.FiveS)
} else if durationSecs < 10 {
return int64(slowTestRetriesSettings.TenS)
} else if durationSecs < 30 {
return int64(slowTestRetriesSettings.ThirtyS)
} else if duration.Minutes() < 5 {
return int64(slowTestRetriesSettings.FiveM)
}
return 0
},
shouldRetry: func(ptrToLocalT *testing.T, executionIndex int, remainingRetries int64) bool {
return remainingRetries >= 0
},
perExecution: func(ptrToLocalT *testing.T, executionIndex int, duration time.Duration) {
// Collect test results
if ptrToLocalT.Failed() {
testFailCount++
} else if ptrToLocalT.Skipped() {
testSkipCount++
} else {
testPassCount++
}
},
onRetryEnd: func(t *testing.T, executionIndex int, lastPtrToLocalT *testing.T) {
// Update test status based on collected counts
tCommonPrivates := getTestPrivateFields(t)
if tCommonPrivates == nil {
panic("getting test private fields failed")
}
status := "passed"
if testPassCount == 0 {
if testSkipCount > 0 {
status = "skipped"
tCommonPrivates.SetSkipped(true)
}
if testFailCount > 0 {
status = "failed"
tCommonPrivates.SetFailed(true)
tParentCommonPrivates := getTestParentPrivateFields(t)
status := "passed"
if testPassCount == 0 {
if testSkipCount > 0 {
status = "skipped"
tCommonPrivates.SetSkipped(true)
}
if testFailCount > 0 {
status = "failed"
tCommonPrivates.SetFailed(true)
tParentCommonPrivates.SetFailed(true)
}
if tParentCommonPrivates == nil {
panic("getting test parent private fields failed")
}
tParentCommonPrivates.SetFailed(true)
}
}

// Print summary after retries
if executionIndex > 0 {
fmt.Printf(" [ %v after %v retries by Datadog's early flake detection ]\n", status, executionIndex)
}
},
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag new test to true
execMeta.isANewTest = true
},
})
}, true
}
}
return targetFunc, false
// Print summary after retries
if executionIndex > 0 {
fmt.Printf(" [ %v after %v retries by Datadog's early flake detection ]\n", status, executionIndex)
}
},
execMetaAdjust: func(execMeta *testExecutionMetadata, executionIndex int) {
// Set the flag new test to true
execMeta.isANewTest = true
// Set the flag EFD execution to true
execMeta.isEFDExecution = true
},
})
}, true
}

// runTestWithRetry encapsulates the common retry logic for test functions.
Expand All @@ -336,7 +342,10 @@ func runTestWithRetry(options *runTestWithRetryOptions) {

for {
// Clear the matcher subnames map before each execution to avoid subname tests being called "parent/subname#NN" due to retries
getTestContextMatcherPrivateFields(options.t).ClearSubNames()
matcher := getTestContextMatcherPrivateFields(options.t)
if matcher != nil {
matcher.ClearSubNames()
}

// Increment execution index
executionIndex++
Expand All @@ -348,6 +357,12 @@ func runTestWithRetry(options *runTestWithRetryOptions) {
// Create a dummy parent so we can run the test using this local copy
// without affecting the test parent
localTPrivateFields := getTestPrivateFields(ptrToLocalT)
if localTPrivateFields == nil {
panic("getting test private fields failed")
}
if localTPrivateFields.parent == nil {
panic("parent of the test is nil")
}
*localTPrivateFields.parent = unsafe.Pointer(&testing.T{})

// Create an execution metadata instance
Expand All @@ -362,6 +377,12 @@ func runTestWithRetry(options *runTestWithRetryOptions) {
if originalExecMeta.isARetry {
execMeta.isARetry = true
}
if originalExecMeta.isEFDExecution {
execMeta.isEFDExecution = true
}
if originalExecMeta.isATRExecution {
execMeta.isATRExecution = true
}
}

// If we are in a retry execution, set the `isARetry` flag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ func instrumentTestingTFunc(f func(*testing.T)) func(*testing.T) {
if parentExecMeta.isARetry {
execMeta.isARetry = true
}
if parentExecMeta.isEFDExecution {
execMeta.isEFDExecution = true
}
if parentExecMeta.isATRExecution {
execMeta.isATRExecution = true
}
}
}

Expand All @@ -175,6 +181,15 @@ func instrumentTestingTFunc(f func(*testing.T)) func(*testing.T) {
if execMeta.isARetry {
// Set the retry tag
test.SetTag(constants.TestIsRetry, "true")

// If the execution is an EFD execution we tag the test event reason
if execMeta.isEFDExecution {
// Set the EFD as the retry reason
test.SetTag(constants.TestRetryReason, "efd")
} else if execMeta.isATRExecution {
// Set the ATR as the retry reason
test.SetTag(constants.TestRetryReason, "atr")
}
}

defer func() {
Expand Down
Loading
Loading