diff --git a/pkg/dynamicinstrumentation/diconfig/binary_inspection.go b/pkg/dynamicinstrumentation/diconfig/binary_inspection.go index c62c9975d75fe3..3608124eda7761 100644 --- a/pkg/dynamicinstrumentation/diconfig/binary_inspection.go +++ b/pkg/dynamicinstrumentation/diconfig/binary_inspection.go @@ -21,17 +21,23 @@ import ( // inspectGoBinaries goes through each service and populates information about the binary // and the relevant parameters, and their types // configEvent maps service names to info about the service and their configurations -func inspectGoBinaries(configEvent ditypes.DIProcs) bool { +func inspectGoBinaries(configEvent ditypes.DIProcs) error { + var err error var inspectedAtLeastOneBinary bool for i := range configEvent { - err := AnalyzeBinary(configEvent[i]) + err = AnalyzeBinary(configEvent[i]) if err != nil { log.Info("inspection of PID %d (path=%s) failed: %w", configEvent[i].PID, configEvent[i].BinaryPath, err) } else { inspectedAtLeastOneBinary = true } } - return inspectedAtLeastOneBinary + + if !inspectedAtLeastOneBinary { + return fmt.Errorf("failed to inspect all tracked go binaries") + } + + return nil } // AnalyzeBinary reads the binary associated with the specified process and parses diff --git a/pkg/dynamicinstrumentation/diconfig/mem_config_manager.go b/pkg/dynamicinstrumentation/diconfig/mem_config_manager.go index a604920c32d83b..07ad2d0df262ee 100644 --- a/pkg/dynamicinstrumentation/diconfig/mem_config_manager.go +++ b/pkg/dynamicinstrumentation/diconfig/mem_config_manager.go @@ -80,9 +80,9 @@ func (cm *ReaderConfigManager) update() error { } if !reflect.DeepEqual(cm.state, updatedState) { - atLeastOneBinaryAnalyzed := inspectGoBinaries(updatedState) - if !atLeastOneBinaryAnalyzed { - return fmt.Errorf("failed to inspect all tracked go binaries.") + err := inspectGoBinaries(updatedState) + if err != nil { + return err } for pid, procInfo := range cm.state { diff --git a/pkg/dynamicinstrumentation/proctracker/proctracker.go b/pkg/dynamicinstrumentation/proctracker/proctracker.go index 40b02e75ee6b81..2dcaa301cfad18 100644 --- a/pkg/dynamicinstrumentation/proctracker/proctracker.go +++ b/pkg/dynamicinstrumentation/proctracker/proctracker.go @@ -84,7 +84,7 @@ func (pt *ProcessTracker) Stop() { } } -func (pt *ProcessTracker) Test_HandleProcessStart(pid uint32) { +func (pt *ProcessTracker) HandleProcessStartSync(pid uint32) { exePath := filepath.Join(pt.procRoot, strconv.FormatUint(uint64(pid), 10), "exe") pt.inspectBinary(exePath, pid) diff --git a/pkg/dynamicinstrumentation/testutil/exploration_e2e_test.go b/pkg/dynamicinstrumentation/testutil/exploration_e2e_test.go index d41ce5dff9d569..9f93fa23402af6 100644 --- a/pkg/dynamicinstrumentation/testutil/exploration_e2e_test.go +++ b/pkg/dynamicinstrumentation/testutil/exploration_e2e_test.go @@ -93,88 +93,135 @@ type ProbeManager struct { mu sync.Mutex } -func NewProbeManager(t *testing.T) *ProbeManager { - return &ProbeManager{ - t: t, - } +type BinaryInfo struct { + path string + hasDebug bool } -func (pm *ProbeManager) Install(pid int, function string) error { - pm.mu.Lock() - defer pm.mu.Unlock() - - // Get or create the map of installed probes for this PID - v, _ := pm.installedProbes.LoadOrStore(pid, make(map[string]struct{})) - probes := v.(map[string]struct{}) - - // Install the probe - probes[function] = struct{}{} - pm.t.Logf("πŸ”§ Installing probe: PID=%d Function=%s", pid, function) - - // Your actual probe installation logic here using GoDI - // Example: - // err := pm.godi.InstallProbe(pid, function) - return nil +type FunctionInfo struct { + PackageName string + FunctionName string + FullName string + ProbeId string } -func (pm *ProbeManager) Remove(pid int, function string) error { - pm.mu.Lock() - defer pm.mu.Unlock() - - if v, ok := pm.installedProbes.Load(pid); ok { - probes := v.(map[string]struct{}) - delete(probes, function) - pm.t.Logf("πŸ”§ Removing probe: PID=%d Function=%s", pid, function) +func NewFunctionInfo(packageName, functionName, fullName string) FunctionInfo { + return FunctionInfo{ + PackageName: packageName, + FunctionName: functionName, + FullName: fullName, + ProbeId: uuid.NewString(), + } +} - // Your actual probe removal logic here +type rcConfig struct { + ID string + Version int + ProbeType string `json:"type"` + Language string + Where struct { + TypeName string `json:"typeName"` + MethodName string `json:"methodName"` + SourceFile string + Lines []string + } + Tags []string + Template string + CaptureSnapshot bool + EvaluatedAt string + Capture struct { + MaxReferenceDepth int `json:"maxReferenceDepth"` + MaxFieldCount int `json:"maxFieldCount"` } - return nil } -func (pm *ProbeManager) CollectData(pid int, function string) (bool, error) { - // Check if we've received data for this probe - // This is where you'd check your actual data collection mechanism +type ConfigAccumulator struct { + configs map[string]map[string]rcConfig + tmpl *template.Template + mu sync.RWMutex +} - // For testing, let's simulate data collection - // In reality, you'd check if your probe has published any data - if v, ok := pm.dataReceived.Load(pid); ok { - dataMap := v.(map[string]bool) - return dataMap[function], nil - } - return false, nil +type RepoInfo struct { + Packages map[string]bool // Package names found in repo + RepoPath string // Path to the repo + CommitHash string // Current commit hash (optional) } -func NewProcessTracker(t *testing.T) *ProcessTracker { - return &ProcessTracker{ - t: t, - processes: make(map[int]*ProcessInfo), - stopChan: make(chan struct{}), - analyzedBinaries: make(map[string]bool), - analyzedPIDs: make(map[int]bool), - done: make(chan struct{}), - } +type explorationEventOutputTestWriter struct { + t *testing.T + expectedResult map[string]*ditypes.CapturedValue } -func (pt *ProcessTracker) markAnalyzed(pid int, path string) { - pt.mu.Lock() - defer pt.mu.Unlock() - pt.analyzedPIDs[pid] = true - pt.analyzedBinaries[path] = true +func (e *explorationEventOutputTestWriter) Write(p []byte) (n int, err error) { + var snapshot ditypes.SnapshotUpload + if err := json.Unmarshal(p, &snapshot); err != nil { + e.t.Error("failed to unmarshal snapshot", err) + } + funcName := snapshot.Debugger.ProbeInSnapshot.Type + "." + snapshot.Debugger.ProbeInSnapshot.Method + e.t.Logf("Received snapshot for function: %s", funcName) + return len(p), nil } -func getProcessArgs(pid int) ([]string, error) { - // Construct the path to the /proc//cmdline file - procFile := fmt.Sprintf("/proc/%d/cmdline", pid) +var ( + analyzedBinaries []BinaryInfo + waitForAttach bool = true + bufferPool = sync.Pool{New: func() interface{} { return new(bytes.Buffer) }} + seenBinaries = make(map[string]struct{}) + g_configsAccumulator *ConfigAccumulator + g_RepoInfo *RepoInfo + g_ConfigManager *diconfig.ReaderConfigManager + g_cmd *exec.Cmd + DEBUG bool = true + TRACE bool = false + NUMBER_OF_PROBES int = 10 + + explorationTestConfigTemplateText = ` + {{- range $index, $target := .}} + {{- if $index}},{{end}} + "{{$target.ProbeId}}": { + "id": "{{$target.ProbeId}}", + "version": 0, + "type": "LOG_PROBE", + "language": "go", + "where": { + "typeName": "{{$target.PackageName}}", + "methodName": "{{$target.FunctionName}}" + }, + "tags": [], + "template": "Executed {{$target.PackageName}}.{{$target.FunctionName}}, it took {@duration}ms", + "segments": [ + { + "str": "Executed {{$target.PackageName}}.{{$target.FunctionName}}, it took " + }, + { + "dsl": "@duration", + "json": { + "ref": "@duration" + } + }, + { + "str": "ms" + } + ], + "captureSnapshot": false, + "capture": { + "maxReferenceDepth": 10 + }, + "sampling": { + "snapshotsPerSecond": 5000 + }, + "evaluateAt": "EXIT" + } + {{- end}} +` +) - // Read the file content - data, err := os.ReadFile(procFile) +func getProcessArgs(pid int) ([]string, error) { + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) if err != nil { return nil, err } - - // The arguments are null-byte separated, split them args := strings.Split(string(data), "\x00") - // Remove any trailing empty string caused by the trailing null byte if len(args) > 0 && args[len(args)-1] == "" { args = args[:len(args)-1] } @@ -182,11 +229,7 @@ func getProcessArgs(pid int) ([]string, error) { } func getProcessCwd(pid int) (string, error) { - // Construct the path to the /proc//cwd symlink - procFile := fmt.Sprintf("/proc/%d/cwd", pid) - - // Read the symlink to find the current working directory - cwd, err := os.Readlink(procFile) + cwd, err := os.Readlink(fmt.Sprintf("/proc/%d/cwd", pid)) if err != nil { return "", err } @@ -194,18 +237,11 @@ func getProcessCwd(pid int) (string, error) { } func getProcessEnv(pid int) ([]string, error) { - // Construct the path to the /proc//environ file - procFile := fmt.Sprintf("/proc/%d/environ", pid) - - // Open and read the file - data, err := os.ReadFile(procFile) + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid)) if err != nil { return nil, err } - - // The environment variables are null-byte separated, split them env := strings.Split(string(data), "\x00") - // Remove any trailing empty string caused by the trailing null byte if len(env) > 0 && env[len(env)-1] == "" { env = env[:len(env)-1] } @@ -228,7 +264,6 @@ func hasDWARFInfo(binaryPath string) (bool, error) { } defer f.Close() - // Try both approaches: section lookup and DWARF data reading debugSections := false for _, section := range f.Sections { if strings.HasPrefix(section.Name, ".debug_") { @@ -237,13 +272,11 @@ func hasDWARFInfo(binaryPath string) (bool, error) { } } - // Try to actually read DWARF data dwarfData, err := f.DWARF() if err != nil { return debugSections, fmt.Errorf("DWARF read error: %w", err) } - // Verify we can read some DWARF data reader := dwarfData.Reader() entry, err := reader.Next() if err != nil { @@ -253,59 +286,9 @@ func hasDWARFInfo(binaryPath string) (bool, error) { fmt.Printf("Found DWARF entry of type: %v\n", entry.Tag) return true, nil } - return false, nil } -type BinaryInfo struct { - path string - hasDebug bool -} - -type FunctionInfo struct { - PackageName string - FunctionName string - FullName string - ProbeId string -} - -func NewFunctionInfo(packageName, functionName, fullName string) FunctionInfo { - return FunctionInfo{ - PackageName: packageName, - FunctionName: functionName, - FullName: fullName, - ProbeId: uuid.NewString(), - } -} - -func extractPackageAndFunction(fullName string) FunctionInfo { - // Handle empty input - if fullName == "" { - return FunctionInfo{} - } - - // First, find the last index of "." before any parentheses - parenIndex := strings.Index(fullName, "(") - lastDot := -1 - if parenIndex != -1 { - // If we have parentheses, look for the last dot before them - lastDot = strings.LastIndex(fullName[:parenIndex], ".") - } else { - // If no parentheses, just find the last dot - lastDot = strings.LastIndex(fullName, ".") - } - - if lastDot == -1 { - return FunctionInfo{} - } - - // Split into package and function parts - pkgPath := fullName[:lastDot] - funcPart := fullName[lastDot+1:] - - return NewFunctionInfo(pkgPath, funcPart, fullName) -} - func listAllFunctions(filePath string) ([]FunctionInfo, error) { var functions []FunctionInfo var errors []string @@ -322,7 +305,6 @@ func listAllFunctions(filePath string) ([]FunctionInfo, error) { } reader := dwarfData.Reader() - for { entry, err := reader.Next() if err != nil { @@ -331,19 +313,16 @@ func listAllFunctions(filePath string) ([]FunctionInfo, error) { if entry == nil { break } - if entry.Tag == dwarf.TagSubprogram { funcName, ok := entry.Val(dwarf.AttrName).(string) if !ok || funcName == "" { continue } - info := extractPackageAndFunction(funcName) if info.FunctionName == "" { errors = append(errors, fmt.Sprintf("could not extract function name from %q", funcName)) continue } - functions = append(functions, info) } } @@ -354,272 +333,86 @@ func listAllFunctions(filePath string) ([]FunctionInfo, error) { } return nil, fmt.Errorf("no functions found in the binary") } - return functions, nil } -// func isStandardPackage(pkg string) bool { -// // List of common standard library packages that might be nested -// stdPkgs := map[string]bool{ -// "encoding/json": true, -// "compress/flate": true, -// "compress/gzip": true, -// "encoding/base64": true, -// // Add more as needed -// } -// return stdPkgs[pkg] -// } - -// func listAllFunctions(filePath string) ([]FunctionInfo, error) { -// var functions []FunctionInfo - -// // Open the ELF file -// ef, err := elf.Open(filePath) -// if err != nil { -// return nil, fmt.Errorf("failed to open file: %v", err) -// } -// defer ef.Close() - -// // Retrieve symbols from the ELF file -// symbols, err := ef.Symbols() -// if err != nil { -// return nil, fmt.Errorf("failed to read symbols: %v", err) -// } - -// // Iterate over symbols and filter function symbols -// for _, sym := range symbols { -// if elf.ST_TYPE(sym.Info) == elf.STT_FUNC { -// // Extract function name -// functionName := sym.Name - -// // Extract package name from section index (if applicable) -// // DWARF data or additional analysis can refine this -// packageName := "" - -// // Add to result -// functions = append(functions, FunctionInfo{ -// PackageName: packageName, -// FunctionName: functionName, -// }) -// } -// } -// return functions, nil -// } +func extractPackageAndFunction(fullName string) FunctionInfo { + if fullName == "" { + return FunctionInfo{} + } + parenIndex := strings.Index(fullName, "(") + lastDot := -1 + if parenIndex != -1 { + lastDot = strings.LastIndex(fullName[:parenIndex], ".") + } else { + lastDot = strings.LastIndex(fullName, ".") + } + if lastDot == -1 { + return FunctionInfo{} + } + pkgPath := fullName[:lastDot] + funcPart := fullName[lastDot+1:] + return NewFunctionInfo(pkgPath, funcPart, fullName) +} func shouldProfileFunction(name string) bool { - // First, immediately reject known system/internal functions - if strings.HasPrefix(name, "*ZN") || // Sanitizer/LLVM functions - strings.HasPrefix(name, "_") || // Internal functions + if strings.HasPrefix(name, "*ZN") || + strings.HasPrefix(name, "_") || strings.Contains(name, "_sanitizer") || strings.Contains(name, "runtime.") { return false } - - // Extract package from function name parts := strings.Split(name, ".") if len(parts) < 2 { return false } - pkgPath := parts[0] if len(parts) > 2 { pkgPath = strings.Join(parts[:len(parts)-1], "/") } - - // Check if it's in our repository packages for repoPkg := range g_RepoInfo.Packages { if strings.Contains(pkgPath, repoPkg) { return true } } - return false } -// func shouldProfileFunction(name string) bool { -// // Skip standard library packages -// stdlibPrefixes := []string{ -// "bufio.", -// "bytes.", -// "context.", -// "crypto.", -// "compress/", -// "database/", -// "debug/", -// "encoding/", -// "errors.", -// "flag.", -// "fmt.", -// "io.", -// "log.", -// "math.", -// "net.", -// "os.", -// "path.", -// "reflect.", -// "regexp.", -// "runtime.", -// "sort.", -// "strconv.", -// "strings.", -// "sync.", -// "syscall.", -// "time.", -// "unicode.", -// } - -// // Definitely skip these system internals -// skipPrefixes := []string{ -// "runtime.", -// "runtime/race", -// "*ZN", // LLVM/Clang internals -// "type..", // Go type metadata -// "gc.", // Garbage collector -// "gosb.", // Go sandbox -// "_rt.", // Runtime helpers -// "reflect.", // Reflection internals -// } - -// skipContains := []string{ -// "_sanitizer", -// "_tsan", -// ".constprop.", // Compiler generated constants -// ".isra.", // LLVM optimized functions -// ".part.", // Partial functions from compiler -// "__gcc_", // GCC internals -// "_cgo_", // CGO generated code -// "goexit", // Go runtime exit handlers -// "gcproc", // GC procedures -// ".loc.", // Location metadata -// "runtimeΒ·", // Runtime internals (different dot) -// } - -// // Quick reject for standard library and system functions -// for _, prefix := range append(stdlibPrefixes, skipPrefixes...) { -// if strings.HasPrefix(name, prefix) { -// return false -// } -// } - -// for _, substr := range skipContains { -// if strings.Contains(name, substr) { -// return false -// } -// } - -// // High priority user functions - definitely profile these -// priorityPrefixes := []string{ -// "main.", -// "cmd.", -// "github.com/", -// "golang.org/x/", -// "google.golang.org/", -// "k8s.io/", -// } - -// for _, prefix := range priorityPrefixes { -// if strings.HasPrefix(name, prefix) { -// return true -// } -// } - -// // Function looks like a normal Go function (CapitalizedName) -// if len(name) > 0 && unicode.IsUpper(rune(name[0])) { -// return true -// } - -// // If it contains a dot and doesn't look like a compiler-generated name -// if strings.Contains(name, ".") && -// !strings.Contains(name, "$") && -// !strings.Contains(name, "__") { -// return true -// } - -// // If we get here, it's probably a system function -// return false -// } - -var NUMBER_OF_PROBES int = 10 - func filterFunctions(funcs []FunctionInfo) []FunctionInfo { var validFuncs []FunctionInfo - - // First pass: collect only functions from our packages for _, f := range funcs { - // Combine package and function name for filtering fullName := fmt.Sprintf("%s.%s", f.PackageName, f.FunctionName) if shouldProfileFunction(fullName) { validFuncs = append(validFuncs, f) } } - - // If we have no valid functions, return empty list if len(validFuncs) == 0 { return nil } - - // Sort valid functions for consistent ordering sort.Slice(validFuncs, func(i, j int) bool { - // Sort alphabetically by full name (package + function) fullNameI := fmt.Sprintf("%s.%s", validFuncs[i].PackageName, validFuncs[i].FunctionName) fullNameJ := fmt.Sprintf("%s.%s", validFuncs[j].PackageName, validFuncs[j].FunctionName) return fullNameI < fullNameJ }) - - // Return all if we have 10 or fewer if len(validFuncs) <= NUMBER_OF_PROBES { return validFuncs } - - // Only take first 10 if we have more return validFuncs[:NUMBER_OF_PROBES] } -// func filterFunctions(funcs []string) []string { -// var validFuncs []string - -// // First pass: collect only functions from our packages -// for _, f := range funcs { -// if shouldProfileFunction(f) { -// validFuncs = append(validFuncs, f) -// } -// } - -// // If we have no valid functions, return empty list -// if len(validFuncs) == 0 { -// return nil -// } - -// // Sort for consistent ordering -// sort.Strings(validFuncs) - -// // Return all if we have 10 or fewer -// if len(validFuncs) <= NUMBER_OF_PROBES { -// return validFuncs -// } - -// // Only take first 10 if we have more -// return validFuncs[:NUMBER_OF_PROBES] -// } - func ExtractFunctions(binaryPath string) ([]FunctionInfo, error) { - // Open the binary file, err := elf.Open(binaryPath) if err != nil { return nil, fmt.Errorf("failed to open binary: %v", err) } defer file.Close() - // Get DWARF data dwarfData, err := file.DWARF() if err != nil { return nil, fmt.Errorf("failed to load DWARF data: %v", err) } - // Prepare result var functions []FunctionInfo - - // Iterate over DWARF entries reader := dwarfData.Reader() for { entry, err := reader.Next() @@ -627,21 +420,14 @@ func ExtractFunctions(binaryPath string) ([]FunctionInfo, error) { return nil, fmt.Errorf("error reading DWARF: %v", err) } if entry == nil { - break // End of entries + break } - - // Check for subprogram (function) entries if entry.Tag == dwarf.TagSubprogram { - // Extract function name funcName, _ := entry.Val(dwarf.AttrName).(string) - - // Extract package/module name (if available) var packageName string if compDir, ok := entry.Val(dwarf.AttrCompDir).(string); ok { packageName = compDir } - - // Add to the result if funcName != "" { functions = append(functions, FunctionInfo{ PackageName: packageName, @@ -650,121 +436,47 @@ func ExtractFunctions(binaryPath string) ([]FunctionInfo, error) { } } } - return functions, nil } -// hasDWARF checks if the given binary contains DWARF debug information. func hasDWARF(binaryPath string) (bool, error) { - // Open the binary file file, err := elf.Open(binaryPath) if err != nil { return false, fmt.Errorf("failed to open binary: %v", err) } defer file.Close() - // Check if DWARF data exists _, err = file.DWARF() if err != nil { - // Check if the error indicates missing DWARF information if err.Error() == "no DWARF data" { return false, nil } - // Otherwise, propagate the error return false, fmt.Errorf("failed to check DWARF data: %v", err) } - - // DWARF data exists return true, nil } -var analyzedBinaries []BinaryInfo -var waitForAttach bool = true -var bufferPool = sync.Pool{ - New: func() interface{} { - return new(bytes.Buffer) - }, -} +func fingerprintGoBinary(binaryPath string) (string, error) { + f, err := elf.Open(binaryPath) + if err != nil { + return "", err + } + defer f.Close() -var g_configsAccumulator *ConfigAccumulator + sections := make([]*elf.Section, len(f.Sections)) + copy(sections, f.Sections) + sort.Slice(sections, func(i, j int) bool { + return sections[i].Name < sections[j].Name + }) -type rcConfig struct { - ID string - Version int - ProbeType string `json:"type"` - Language string - Where struct { - TypeName string `json:"typeName"` - MethodName string `json:"methodName"` - SourceFile string - Lines []string - } - Tags []string - Template string - CaptureSnapshot bool - EvaluatedAt string - Capture struct { - MaxReferenceDepth int `json:"maxReferenceDepth"` - MaxFieldCount int `json:"maxFieldCount"` - } -} - -type ConfigAccumulator struct { - configs map[string]map[string]rcConfig - tmpl *template.Template - mu sync.RWMutex -} - -func NewConfigAccumulator() (*ConfigAccumulator, error) { - tmpl, err := template.New("config_template").Parse(explorationTestConfigTemplateText) - if err != nil { - return nil, fmt.Errorf("failed to parse template: %w", err) - } - - return &ConfigAccumulator{ - configs: make(map[string]map[string]rcConfig), - tmpl: tmpl, - }, nil -} - -// fingerprintGoBinary opens an ELF binary at binaryPath, -// iterates over its sections (in a sorted order by name), -// skips known non-deterministic sections (like .note.go.buildid), -// and computes a SHA256 hash over the remaining content. -func fingerprintGoBinary(binaryPath string) (string, error) { - // Open the ELF file. - f, err := elf.Open(binaryPath) - if err != nil { - return "", err - } - defer f.Close() - - // Make a copy of the sections and sort them by name. - sections := make([]*elf.Section, len(f.Sections)) - copy(sections, f.Sections) - sort.Slice(sections, func(i, j int) bool { - return sections[i].Name < sections[j].Name - }) - - // Create a hash to accumulate the fingerprint. hash := sha256.New() for _, sec := range sections { - // Skip sections with no bytes in the file. - if sec.Type == elf.SHT_NOBITS { + if sec.Type == elf.SHT_NOBITS || sec.Name == ".note.go.buildid" { continue } - - // Skip the Go build ID section. - if sec.Name == ".note.go.buildid" { - continue - } - - // Write the section name to the hash. if _, err := io.WriteString(hash, sec.Name); err != nil { return "", err } - - // Read the section data. data, err := sec.Data() if err != nil { return "", err @@ -773,13 +485,9 @@ func fingerprintGoBinary(binaryPath string) (string, error) { return "", err } } - return hex.EncodeToString(hash.Sum(nil)), nil } -// HaveISeenItBefore uses a simple in-memory map to record fingerprints. -var seenBinaries = make(map[string]struct{}) - func isAlreadyProcessed(binaryPath string) (bool, error) { fingerprint, err := fingerprintGoBinary(binaryPath) if err != nil { @@ -792,284 +500,102 @@ func isAlreadyProcessed(binaryPath string) (bool, error) { return false, nil } -func InspectBinary(t *testing.T, binaryPath string, pid int) error { - // // check that we can analyse the binary without targeting a specific function - // err := diconfig.AnalyzeBinary(&ditypes.ProcessInfo{BinaryPath: binaryPath}) - // if err != nil { - // // log.Fatalln("Failed to analyze", binaryPath, "--", err) - // return nil - // } - - // targets, err := ExtractFunctions(binaryPath) - // if err != nil { - // // log.Fatalf("Error extracting functions: %v", err) - // return nil - // } - - // hasDwarf, err := hasDWARF(binaryPath) - // if err != nil || !hasDwarf { - // // log.Fatalf("Error checking for DWARF info: %v", err) - // return nil - // } - - //processed, err := isAlreadyProcessed(binaryPath) - // - //if err != nil { - // LogDebug(t, "Failed to determine if `binaryPath` is already processed args: %v, binaryPath: %s", err, binaryPath) - // // Don't fail the entire processing - //} - // - //if processed { - // LogDebug(t, "Already processed %s, skipping.", binaryPath) - // return nil - //} - - allFuncs, err := listAllFunctions(binaryPath) - if err != nil { - analyzedBinaries = append(analyzedBinaries, BinaryInfo{ - path: binaryPath, - hasDebug: false, - }) - - return nil - } - - targets := filterFunctions(allFuncs) - //targets := allFuncs - - // Get process arguments - args, err := getProcessArgs(pid) - if err != nil { - return fmt.Errorf("Failed to process args: %v", err) - } - - // Get process current working directory - cwd, err := getProcessCwd(pid) +func getBinaryPath(pid int) string { + path, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid)) if err != nil { - return fmt.Errorf("Failed to get Cwd: %v", err) + return "" } - - // // Get process environment variables - env, err := getProcessEnv(pid) - if err != nil { - return fmt.Errorf("Failed to get Env: %v", err) + realPath, err := filepath.EvalSymlinks(path) + if err == nil { + path = realPath } + return path +} - serviceName, err := extractDDService(env) +func getParentPID(pid int) int { + ppidStr, err := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)) if err != nil { - return fmt.Errorf("Failed to get Env: %v, binaryPath: %s", err, binaryPath) - } - - LogDebug(t, "\n=======================================") - LogDebug(t, "πŸ” SERVICE NAME: %s", serviceName) - LogDebug(t, "πŸ” ANALYZING BINARY: %s", binaryPath) - LogDebug(t, "πŸ” ENV: %v", env) - LogDebug(t, "πŸ” ARGS: %v", args) - LogDebug(t, "πŸ” CWD: %s", cwd) - LogDebug(t, "πŸ” Elected %d target functions:", len(targets)) - for _, f := range targets { - LogDebug(t, " β†’ Package: %s, Function: %s, FullName: %s", f.PackageName, f.FunctionName, f.FullName) - } - - // hasDWARF, dwarfErr := hasDWARFInfo(binaryPath) - // if dwarfErr != nil { - // log.Printf("Error checking DWARF info: %v", dwarfErr) - // } else { - // log.Printf("Binary has DWARF info: %v", hasDWARF) - // } - // LogDebug(t, "πŸ” ENV: %v", env) - LogDebug(t, "=======================================") - - // Check if the binary exists - if _, err := os.Stat(binaryPath); err != nil { - return fmt.Errorf("(1) binary inspection failed: %v", err) - } - - analyzedBinaries = append(analyzedBinaries, BinaryInfo{ - path: binaryPath, - hasDebug: len(targets) > 0, - }) - - // i := 0 - // // Re-check binary existence - // for { - // if _, err := os.Stat(binaryPath); err != nil { - // time.Sleep(10 * time.Hour) - // return fmt.Errorf("(2) binary inspection failed: %v", err) - // } - - // // if strings.HasSuffix(binaryPath, "generate-protos") { - // // break - // // } - - // if strings.HasSuffix(binaryPath, "conformance.test") { - // time.Sleep(10 * time.Second) - // break - // } - - // i++ - // if i > 11 { - // break - // } - - // // time.Sleep(100 * time.Millisecond) - // } - - LogDebug(t, "βœ… Analysis complete for: %s", binaryPath) - LogDebug(t, "=======================================\n") - - // Notify the ConfigManager that a new process has arrived - g_ConfigManager.ProcTracker.Test_HandleProcessStart(uint32(pid)) - - t.Logf("About to request instrumentations for binary: %s, pid: %d.", binaryPath, pid) - - if err := g_configsAccumulator.AddTargets(targets, serviceName); err != nil { - t.Logf("Error adding target: %v, binaryPath: %s", err, binaryPath) - return fmt.Errorf("add targets failed: %v, binary: %s", err, binaryPath) + return 0 } - - if err = g_configsAccumulator.WriteConfigs(); err != nil { - t.Logf("Error writing configs: %v, binaryPath: %s", err, binaryPath) - return fmt.Errorf("error adding configs: %v, binary: %s", err, binaryPath) + fields := strings.Fields(string(ppidStr)) + if len(fields) < 4 { + return 0 } + ppid, _ := strconv.Atoi(fields[3]) + return ppid +} - //cfgTemplate, err := template.New("config_template").Parse(explorationTestConfigTemplateText) - //require.NoError(t, err) - // - //buf := bufferPool.Get().(*bytes.Buffer) - //buf.Reset() - //defer bufferPool.Put(buf) - // - //if err = cfgTemplate.Execute(buf, targets); err != nil { - // return fmt.Errorf("template execution failed: %w", err) - //} - // - //_, err = g_ConfigManager.ConfigWriter.Write(buf.Bytes()) - // - //if err != nil { - // return fmt.Errorf("config writing failed: %v, binary: %s", err, binaryPΖ’ath) - //} - - time.Sleep(2 * time.Second) - - t.Logf("Requested to instrument %d functions for binary: %s, pid: %d.", len(targets), binaryPath, pid) - - for _, f := range targets { - t.Logf(" -> requested instrumentation for %v", f) +func findAncestors(pid int, tree map[int]bool) { + for pid > 1 { + if tree[pid] { + return + } + tree[pid] = true + ppid := getParentPID(pid) + if ppid <= 1 { + return + } + pid = ppid } +} - //b := []byte{} - //var buf *bytes.Buffer - - if waitForAttach && os.Getenv("DEBUG") == "true" { - pid := os.Getpid() - t.Logf("(1) Waiting to attach for PID: %d", pid) - time.Sleep(30 * time.Second) - waitForAttach = false +func LogDebug(t *testing.T, format string, args ...any) { + if DEBUG { + t.Logf(format, args...) } +} - /* - requesterdFuncs := 0 - for _, f := range targets { - - // if !strings.Contains(f.FullName, "blabla_blabla") { - // continue - // } - - // if !strings.Contains(f.FullName, "FullName") { - // continue - // } - - // if f.FullName != "regexp.(*bitState).shouldVisit" { - // continue - // } - - // if f.FullName != "google.golang.org/protobuf/encoding/protodelim_test.(*notBufioReader).UnreadRune" { - // continue - // } - - buf = bytes.NewBuffer(b) - err = cfgTemplate.Execute(buf, f) - if err != nil { - continue - } - - // LogDebug(t, "Requesting instrumentation for %v", f) - t.Logf("Requesting instrumentation for %v", f) - _, err := g_ConfigManager.ConfigWriter.Write(buf.Bytes()) - - if err != nil { - continue - } - - requesterdFuncs++ - } - */ - /*if !waitForAttach { - time.Sleep(100 * time.Second) - }*/ - - /*if requesterdFuncs > 0 { - // if waitForAttach { - // pid := os.Getpid() - // t.Logf("(2) Waiting to attach for PID: %d", pid) - // time.Sleep(30 * time.Second) - // waitForAttach = false - // } - - // Wait for probes to be instrumented - time.Sleep(2 * time.Second) +func NewProbeManager(t *testing.T) *ProbeManager { + return &ProbeManager{t: t} +} - t.Logf("Requested to instrument %d functions for binary: %s, pid: %d.", requesterdFuncs, binaryPath, pid) - }*/ +func (pm *ProbeManager) Install(pid int, function string) error { + pm.mu.Lock() + defer pm.mu.Unlock() + v, _ := pm.installedProbes.LoadOrStore(pid, make(map[string]struct{})) + probes := v.(map[string]struct{}) + probes[function] = struct{}{} + pm.t.Logf("Installing probe: PID=%d Function=%s", pid, function) return nil } -func (ca *ConfigAccumulator) AddTargets(targets []FunctionInfo, serviceName string) error { - ca.mu.Lock() - defer ca.mu.Unlock() - - buf := bufferPool.Get().(*bytes.Buffer) - buf.Reset() - defer bufferPool.Put(buf) - - buf.WriteString("{") - if err := ca.tmpl.Execute(buf, targets); err != nil { - return fmt.Errorf("failed to execute template: %w", err) - } - buf.WriteString("}") - - var newConfigs map[string]rcConfig - if err := json.NewDecoder(buf).Decode(&newConfigs); err != nil { - return fmt.Errorf("failed to decode generated configs: %w", err) - } - - if ca.configs[serviceName] == nil { - ca.configs[serviceName] = make(map[string]rcConfig) - } +func (pm *ProbeManager) Remove(pid int, function string) error { + pm.mu.Lock() + defer pm.mu.Unlock() - for probeID, config := range newConfigs { - ca.configs[serviceName][probeID] = config + if v, ok := pm.installedProbes.Load(pid); ok { + probes := v.(map[string]struct{}) + delete(probes, function) + pm.t.Logf("Removing probe: PID=%d Function=%s", pid, function) } - return nil } -func (ca *ConfigAccumulator) WriteConfigs() error { - ca.mu.RLock() - defer ca.mu.RUnlock() - - buf := bufferPool.Get().(*bytes.Buffer) - buf.Reset() - defer bufferPool.Put(buf) +func (pm *ProbeManager) CollectData(pid int, function string) (bool, error) { + if v, ok := pm.dataReceived.Load(pid); ok { + dataMap := v.(map[string]bool) + return dataMap[function], nil + } + return false, nil +} - // Marshal the full config structure (service name -> probe configs) - if err := json.NewEncoder(buf).Encode(ca.configs); err != nil { - return fmt.Errorf("failed to marshal configs: %w", err) +func NewProcessTracker(t *testing.T) *ProcessTracker { + return &ProcessTracker{ + t: t, + processes: make(map[int]*ProcessInfo), + stopChan: make(chan struct{}), + analyzedBinaries: make(map[string]bool), + analyzedPIDs: make(map[int]bool), + done: make(chan struct{}), } +} - return g_ConfigManager.ConfigWriter.WriteSync(buf.Bytes()) +func (pt *ProcessTracker) markAnalyzed(pid int, path string) { + pt.mu.Lock() + defer pt.mu.Unlock() + pt.analyzedPIDs[pid] = true + pt.analyzedBinaries[path] = true } func (pt *ProcessTracker) addProcess(pid int, parentPID int) *ProcessInfo { @@ -1089,72 +615,34 @@ func (pt *ProcessTracker) addProcess(pid int, parentPID int) *ProcessInfo { StartTime: time.Now(), Analyzed: false, } - pt.processes[pid] = proc - - // Add to parent's children if parent exists if parent, exists := pt.processes[parentPID]; exists { parent.Children = append(parent.Children, proc) } - - pt.LogTrace("πŸ‘Ά New process: PID=%d, Parent=%d, Binary=%s", pid, parentPID, binaryPath) + pt.LogTrace("New process: PID=%d, Parent=%d, Binary=%s", pid, parentPID, binaryPath) return proc } -func getBinaryPath(pid int) string { - path, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid)) - if err != nil { - return "" - } - - // Resolve any symlinks - realPath, err := filepath.EvalSymlinks(path) - if err == nil { - path = realPath - } - - return path -} - func (pt *ProcessTracker) analyzeBinary(pid int, info *ProcessInfo) error { if info == nil { return fmt.Errorf("nil process info") } - pt.mu.Lock() info.State = StateAnalyzing pt.mu.Unlock() - // pt.LogTrace("πŸ”Ž Analyzing binary PID=%d Path=%s", pid, info.BinaryPath) - - // Perform analysis if err := InspectBinary(pt.t, info.BinaryPath, pid); err != nil { pt.mu.Lock() info.State = StateNew pt.mu.Unlock() return fmt.Errorf("binary analysis failed: %v", err) } - pt.mu.Lock() info.State = StateRunning pt.mu.Unlock() - return nil } -func getParentPID(pid int) int { - ppidStr, err := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)) - if err != nil { - return 0 - } - fields := strings.Fields(string(ppidStr)) - if len(fields) < 4 { - return 0 - } - ppid, _ := strconv.Atoi(fields[3]) - return ppid -} - func (pt *ProcessTracker) scanProcessTree() error { if err := syscall.Kill(-g_cmd.Process.Pid, syscall.SIGSTOP); err != nil { if err != unix.ESRCH { @@ -1162,30 +650,13 @@ func (pt *ProcessTracker) scanProcessTree() error { } return nil } - - // pt.profiler.OnProcessesPaused() - defer func() { if err := syscall.Kill(-g_cmd.Process.Pid, syscall.SIGCONT); err != nil { if err != unix.ESRCH { pt.LogTrace("⚠️ Failed to resume PID %d: %v", -g_cmd.Process.Pid, err) } - } else { - // pt.LogTrace("▢️ Resumed process: PID=%d", -g_cmd.Process.Pid) } - - // pt.profiler.OnProcessesResumed() - - // if err := unix.Kill(pid, unix.SIGCONT); err != nil { - // if err != unix.ESRCH { - // pt.LogTrace("⚠️ Failed to resume PID %d: %v", pid, err) - // } - // } else { - // pt.LogTrace("▢️ Resumed process: PID=%d", pid) - // } }() - - // Get all processes allPids := make(map[int]bool) if entries, err := os.ReadDir("/proc"); err == nil { for _, entry := range entries { @@ -1194,8 +665,6 @@ func (pt *ProcessTracker) scanProcessTree() error { } } } - - // Record our own process tree for exclusion ourProcessTree := make(map[int]bool) ourPid := os.Getpid() findAncestors(ourPid, ourProcessTree) @@ -1205,42 +674,25 @@ func (pt *ProcessTracker) scanProcessTree() error { path string ppid int } - - // Check each PID for pid := range allPids { - // Skip if already analyzed pt.mu.RLock() if pt.analyzedPIDs[pid] { pt.mu.RUnlock() continue } pt.mu.RUnlock() - - // Skip if in our process tree if ourProcessTree[pid] { continue } - - // Get process path binaryPath := getBinaryPath(pid) if binaryPath == "" { continue } - - // Get parent PID ppid := getParentPID(pid) - - // Skip if parent is in our tree if ourProcessTree[ppid] { continue } - - // Always analyze: - // 1. Test binaries (.test) - // 2. Go build executables in /tmp - // 3. Children of test binaries shouldAnalyze := false - if strings.HasSuffix(binaryPath, ".test") { shouldAnalyze = true pt.LogTrace("Found test binary: %s (PID=%d)", binaryPath, pid) @@ -1248,185 +700,107 @@ func (pt *ProcessTracker) scanProcessTree() error { shouldAnalyze = true pt.LogTrace("Found build binary: %s (PID=%d)", binaryPath, pid) } else { - // Check if parent is a test binary parentPath := getBinaryPath(ppid) if strings.HasSuffix(parentPath, ".test") { shouldAnalyze = true pt.LogTrace("Found child of test: %s (PID=%d, Parent=%d)", binaryPath, pid, ppid) } } - if shouldAnalyze { - // Verify process still exists if _, err := os.Stat(fmt.Sprintf("/proc/%d", pid)); err == nil { toAnalyze = append(toAnalyze, struct { pid int path string ppid int }{pid, binaryPath, ppid}) - - // Add to process tree if pt.processes[pid] == nil { pt.addProcess(pid, ppid) } } } } - if len(toAnalyze) > 0 { - pt.LogTrace("\nπŸ” Found %d processes to analyze:", len(toAnalyze)) + pt.LogTrace("πŸ” Found %d processes to analyze:", len(toAnalyze)) for _, p := range toAnalyze { pt.LogTrace(" PID=%d PPID=%d Path=%s", p.pid, p.ppid, p.path) } } - var activePids []int for _, p := range toAnalyze { activePids = append(activePids, p.pid) } - - // if pt.profiler!= nil { - // pt.profiler.OnTick(activePids) - // } - - // Process in small batches batchSize := 2 for i := 0; i < len(toAnalyze); i += batchSize { end := i + batchSize if end > len(toAnalyze) { end = len(toAnalyze) } - var wg sync.WaitGroup for _, p := range toAnalyze[i:end] { wg.Add(1) go func(pid int, path string) { defer wg.Done() - - // Verify process still exists if _, err := os.Stat(fmt.Sprintf("/proc/%d", pid)); err != nil { return } - - pt.LogTrace("πŸ” Stopping process for analysis: PID=%d Path=%s", pid, path) - - // Get process info + pt.LogTrace("Stopping process for analysis: PID=%d Path=%s", pid, path) pt.mu.RLock() proc := pt.processes[pid] pt.mu.RUnlock() - if proc == nil { return } - - // Stop process - // if err := syscall.Kill(-g_cmd.Process.Pid, syscall.SIGSTOP); err != nil { - // if err != unix.ESRCH { - // pt.LogTrace("⚠️ Failed to stop PID %d: %v", pid, err) - // } - // return - // } - - // if err := unix.Kill(pid, unix.SIGSTOP); err != nil { - // if err != unix.ESRCH { - // pt.LogTrace("⚠️ Failed to stop PID %d: %v", pid, err) - // } - // return - // } - - // Ensure process gets resumed - // defer func() { - // if err := syscall.Kill(-g_cmd.Process.Pid, syscall.SIGCONT); err != nil { - // if err != unix.ESRCH { - // pt.LogTrace("⚠️ Failed to resume PID %d: %v", pid, err) - // } - // } else { - // pt.LogTrace("▢️ Resumed process: PID=%d", pid) - // } - - // // if err := unix.Kill(pid, unix.SIGCONT); err != nil { - // // if err != unix.ESRCH { - // // pt.LogTrace("⚠️ Failed to resume PID %d: %v", pid, err) - // // } - // // } else { - // // pt.LogTrace("▢️ Resumed process: PID=%d", pid) - // // } - // }() - - // Wait a bit after stopping - // time.Sleep(1 * time.Millisecond) - - // Analyze with timeout if err := pt.analyzeBinary(pid, proc); err != nil { - pt.LogTrace("⚠️ Analysis failed: %v", err) + pt.LogTrace("Analysis failed: %v", err) } else { proc.Analyzed = true pt.markAnalyzed(pid, path) - // pt.LogTrace("βœ… Analysis complete: PID=%d", pid) } - - // go func() { - // if err := pt.analyzeBinary(pid, proc); err != nil { - // pt.LogTrace("⚠️ Analysis failed: %v", err) - // done <- false - // return - // } - - // proc.Analyzed = true - // pt.markAnalyzed(pid, path) - // pt.LogTrace("βœ… Analysis complete: PID=%d", pid) - // done <- true - // }() }(p.pid, p.path) } wg.Wait() - - // Wait between batches time.Sleep(10 * time.Microsecond) } - return nil } -func (pt *ProcessTracker) Cleanup() { -} - -// Helper to record process tree starting from a PID -func findAncestors(pid int, tree map[int]bool) { - for pid > 1 { - if tree[pid] { - return // Already visited +func (pt *ProcessTracker) logProcessTree() { + pt.mu.RLock() + defer pt.mu.RUnlock() + pt.t.Log("\n🌳 Process Tree:") + var printNode func(proc *ProcessInfo, prefix string) + printNode = func(proc *ProcessInfo, prefix string) { + state := "➑️" + switch proc.State { + case StateAnalyzing: + state = "πŸ”" + case StateRunning: + state = "▢️" + case StateExited: + state = "⏹️" } - tree[pid] = true - - // Get parent - ppid := getParentPID(pid) - if ppid <= 1 { - return + analyzed := "" + if proc.Analyzed { + analyzed = "βœ“" } - pid = ppid + pt.LogTrace("%s%s [PID=%d] %s%s (Parent=%d)", + prefix, state, proc.PID, filepath.Base(proc.BinaryPath), analyzed, proc.ParentPID) + for _, child := range proc.Children { + printNode(child, prefix+" ") + } + } + if main, exists := pt.processes[pt.mainPID]; exists { + printNode(main, "") } } -var g_cmd *exec.Cmd - func (pt *ProcessTracker) StartTracking(command string, args []string, dir string) error { - // ctx, cancel := context.WithCancel(context.Background()) - // defer cancel() - - // if err := pt.profiler.Start(ctx); err != nil { - // return fmt.Errorf("failed to start profiler: %w", err) - // } - // defer pt.profiler.Stop() - cmd := exec.Command(command, args...) g_cmd = cmd - if dir != "" { cmd.Dir = dir } - cmd.Env = append( - os.Environ(), + cmd.Env = append(os.Environ(), "PWD="+dir, "DD_DYNAMIC_INSTRUMENTATION_ENABLED=true", "DD_SERVICE=go-di-exploration-test-service") @@ -1441,23 +815,11 @@ func (pt *ProcessTracker) StartTracking(command string, args []string, dir strin pt.mainPID = cmd.Process.Pid pt.addProcess(pt.mainPID, os.Getpid()) - // Start scanning with high frequency initially go func() { - // Initial high-frequency scanning initialTicker := time.NewTicker(1 * time.Millisecond) defer initialTicker.Stop() - - // After initial period, reduce frequency slightly - // time.AfterFunc(5*time.Second, func() { - // initialTicker.Stop() - // }) - - // regularTicker := time.NewTicker(10 * time.Millisecond) - // defer regularTicker.Stop() - logTicker := time.NewTicker(10 * time.Second) defer logTicker.Stop() - for { select { case <-pt.stopChan: @@ -1467,86 +829,174 @@ func (pt *ProcessTracker) StartTracking(command string, args []string, dir strin pt.LogTrace("⚠️ Error scanning: %v", err) } case <-logTicker.C: - // pt.logProcessTree() } } }() err := cmd.Wait() close(pt.stopChan) - pt.LogTrace("Analyzed %d binaries.", len(analyzedBinaries)) - for _, binary := range analyzedBinaries { pt.LogTrace("Analyzed %s (debug info: %v)", binary.path, binary.hasDebug) } - return err } -func (pt *ProcessTracker) logProcessTree() { - pt.mu.RLock() - defer pt.mu.RUnlock() +func (pt *ProcessTracker) Cleanup() { + // Cleanup logic if needed. +} - pt.t.Log("\n🌳 Process Tree:") - var printNode func(proc *ProcessInfo, prefix string) - printNode = func(proc *ProcessInfo, prefix string) { - state := "➑️" - switch proc.State { - case StateAnalyzing: - state = "πŸ”" - case StateRunning: - state = "▢️" - case StateExited: - state = "⏹️" - } +func (pt *ProcessTracker) LogTrace(format string, args ...any) { + if TRACE { + pt.t.Logf(format, args...) + } +} - analyzed := "" - if proc.Analyzed { - analyzed = "βœ“" - } +func NewConfigAccumulator() (*ConfigAccumulator, error) { + tmpl, err := template.New("config_template").Parse(explorationTestConfigTemplateText) + if err != nil { + return nil, fmt.Errorf("failed to parse template: %w", err) + } + return &ConfigAccumulator{ + configs: make(map[string]map[string]rcConfig), + tmpl: tmpl, + }, nil +} - pt.LogTrace("%s%s [PID=%d] %s%s (Parent=%d)", - prefix, state, proc.PID, filepath.Base(proc.BinaryPath), analyzed, proc.ParentPID) +func (ca *ConfigAccumulator) AddTargets(targets []FunctionInfo, serviceName string) error { + ca.mu.Lock() + defer ca.mu.Unlock() - for _, child := range proc.Children { - printNode(child, prefix+" ") - } + buf := bufferPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufferPool.Put(buf) + + buf.WriteString("{") + if err := ca.tmpl.Execute(buf, targets); err != nil { + return fmt.Errorf("failed to execute template: %w", err) } + buf.WriteString("}") - if main, exists := pt.processes[pt.mainPID]; exists { - printNode(main, "") + var newConfigs map[string]rcConfig + if err := json.NewDecoder(buf).Decode(&newConfigs); err != nil { + return fmt.Errorf("failed to decode generated configs: %w", err) + } + if ca.configs[serviceName] == nil { + ca.configs[serviceName] = make(map[string]rcConfig) } + for probeID, config := range newConfigs { + ca.configs[serviceName][probeID] = config + } + return nil } -var DEBUG bool = true -var TRACE bool = false +func (ca *ConfigAccumulator) WriteConfigs() error { + ca.mu.RLock() + defer ca.mu.RUnlock() -func (pt *ProcessTracker) LogTrace(format string, args ...any) { - if TRACE { - pt.t.Logf(format, args...) + buf := bufferPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufferPool.Put(buf) + + if err := json.NewEncoder(buf).Encode(ca.configs); err != nil { + return fmt.Errorf("failed to marshal configs: %w", err) } + return g_ConfigManager.ConfigWriter.WriteSync(buf.Bytes()) } -func LogDebug(t *testing.T, format string, args ...any) { - if DEBUG { - t.Logf(format, args...) +func InspectBinary(t *testing.T, binaryPath string, pid int) error { + allFuncs, err := listAllFunctions(binaryPath) + if err != nil { + analyzedBinaries = append(analyzedBinaries, BinaryInfo{ + path: binaryPath, + hasDebug: false, + }) + return nil } -} -var g_RepoInfo *RepoInfo -var g_ConfigManager *diconfig.ReaderConfigManager + targets := filterFunctions(allFuncs) + args, err := getProcessArgs(pid) + if err != nil { + return fmt.Errorf("failed to process args: %v", err) + } + cwd, err := getProcessCwd(pid) + if err != nil { + return fmt.Errorf("failed to get Cwd: %v", err) + } + env, err := getProcessEnv(pid) + if err != nil { + return fmt.Errorf("failed to get Env: %v", err) + } + serviceName, err := extractDDService(env) + if err != nil { + return fmt.Errorf("failed to get Env: %v, binaryPath: %s", err, binaryPath) + } + + LogDebug(t, "\n=======================================") + LogDebug(t, "πŸ” SERVICE NAME: %s", serviceName) + LogDebug(t, "πŸ” ANALYZING BINARY: %s", binaryPath) + LogDebug(t, "πŸ” ENV: %v", env) + LogDebug(t, "πŸ” ARGS: %v", args) + LogDebug(t, "πŸ” CWD: %s", cwd) + LogDebug(t, "πŸ” Elected %d target functions:", len(targets)) + for _, f := range targets { + LogDebug(t, " β†’ Package: %s, Function: %s, FullName: %s", f.PackageName, f.FunctionName, f.FullName) + } + LogDebug(t, "=======================================") + + if _, err := os.Stat(binaryPath); err != nil { + return fmt.Errorf("(1) binary inspection failed: %v", err) + } + + analyzedBinaries = append(analyzedBinaries, BinaryInfo{ + path: binaryPath, + hasDebug: len(targets) > 0, + }) + LogDebug(t, "βœ… Analysis complete for: %s", binaryPath) + LogDebug(t, "=======================================\n") + + g_ConfigManager.ProcTracker.HandleProcessStartSync(uint32(pid)) + t.Logf("About to request instrumentations for binary: %s, pid: %d.", binaryPath, pid) + + if err := g_configsAccumulator.AddTargets(targets, serviceName); err != nil { + t.Logf("Error adding target: %v, binaryPath: %s", err, binaryPath) + return fmt.Errorf("add targets failed: %v, binary: %s", err, binaryPath) + } + if err = g_configsAccumulator.WriteConfigs(); err != nil { + t.Logf("Error writing configs: %v, binaryPath: %s", err, binaryPath) + return fmt.Errorf("error adding configs: %v, binary: %s", err, binaryPath) + } + time.Sleep(2 * time.Second) + t.Logf("Requested to instrument %d functions for binary: %s, pid: %d.", len(targets), binaryPath, pid) + for _, f := range targets { + t.Logf(" -> requested instrumentation for %v", f) + } + if waitForAttach && os.Getenv("DEBUG") == "true" { + pid := os.Getpid() + t.Logf("Waiting to attach for PID: %d", pid) + time.Sleep(30 * time.Second) + waitForAttach = false + } + return nil +} +// TestExplorationGoDI is the entrypoint of the integration test of Go DI. The idea is to +// test Go DI systematically and in exploratory manner. In high level, here are the steps this test takes: +// 1. Clones protobuf and applies patches. +// 2. Figuring out the 1st party packages involved with the cloned project (to avoid 3rd party/std libs) +// 3. Compiles the test +// 4. Runs the test in a supervised environment, spawning processes as a group. +// 5. Periodically pauses and resumes the process group to analyze each binary unique. +// 6. Invoke Go DI to put probes in top X functions defined by `NUMBER_OF_RROBES` const. +// +// The goal is to exercise as many code paths as possible of the Go DI system. func TestExplorationGoDI(t *testing.T) { require.NoError(t, rlimit.RemoveMemlock(), "Failed to remove memlock limit") if features.HaveMapType(ebpf.RingBuf) != nil { t.Skip("Ringbuffers not supported on this kernel") } - eventOutputWriter := &explorationEventOutputTestWriter{ - t: t, - } - + eventOutputWriter := &explorationEventOutputTestWriter{t: t} opts := &dynamicinstrumentation.DIOptions{ RateLimitPerProbePerSecond: 0.0, ReaderWriterOptions: dynamicinstrumentation.ReaderWriterOptions{ @@ -1560,7 +1010,6 @@ func TestExplorationGoDI(t *testing.T) { GoDI *dynamicinstrumentation.GoDI err error ) - GoDI, err = dynamicinstrumentation.RunDynamicInstrumentation(opts) require.NoError(t, err) t.Cleanup(GoDI.Close) @@ -1569,10 +1018,9 @@ func TestExplorationGoDI(t *testing.T) { if !ok { t.Fatal("Config manager is of wrong type") } - g_ConfigManager = cm - g_configsAccumulator, err = NewConfigAccumulator() + g_configsAccumulator, err = NewConfigAccumulator() if err != nil { t.Fatal("Failed to create ConfigAccumulator") } @@ -1590,23 +1038,6 @@ func TestExplorationGoDI(t *testing.T) { require.NoError(t, err) } -type explorationEventOutputTestWriter struct { - t *testing.T - expectedResult map[string]*ditypes.CapturedValue -} - -func (e *explorationEventOutputTestWriter) Write(p []byte) (n int, err error) { - var snapshot ditypes.SnapshotUpload - if err := json.Unmarshal(p, &snapshot); err != nil { - e.t.Error("failed to unmarshal snapshot", err) - } - - funcName := snapshot.Debugger.ProbeInSnapshot.Type + "." + snapshot.Debugger.ProbeInSnapshot.Method - e.t.Logf("Received snapshot for function: %s", funcName) - - return len(p), nil -} - func initializeTempDir(t *testing.T, predefinedTempDir string) string { if predefinedTempDir != "" { return predefinedTempDir @@ -1618,32 +1049,20 @@ func initializeTempDir(t *testing.T, predefinedTempDir string) string { return tempDir } -// RepoInfo holds scanned repository package information -type RepoInfo struct { - Packages map[string]bool // Package names found in repo - RepoPath string // Path to the repo - CommitHash string // Current commit hash (optional) -} - func ScanRepoPackages(repoPath string) (*RepoInfo, error) { info := &RepoInfo{ Packages: make(map[string]bool), RepoPath: repoPath, } - - // Get git hash if available if _, err := os.Stat(filepath.Join(repoPath, ".git")); err == nil { if hash, err := exec.Command("git", "-C", repoPath, "rev-parse", "HEAD").Output(); err == nil { info.CommitHash = strings.TrimSpace(string(hash)) } } - err := filepath.Walk(repoPath, func(path string, f os.FileInfo, err error) error { if err != nil { return nil } - - // Skip certain directories if f.IsDir() { dirname := filepath.Base(path) if dirname == ".git" || @@ -1656,29 +1075,21 @@ func ScanRepoPackages(repoPath string) (*RepoInfo, error) { } return nil } - - // Only process .go files if !strings.HasSuffix(path, ".go") { return nil } - - // Skip test files and generated files if strings.HasSuffix(path, "_test.go") || strings.HasSuffix(path, ".pb.go") { return nil } - - // Ensure the file is within the repo (not in .cache etc) relPath, err := filepath.Rel(repoPath, path) if err != nil || strings.Contains(relPath, "..") { return nil } - content, err := os.ReadFile(path) if err != nil { return nil } - scanner := bufio.NewScanner(bytes.NewReader(content)) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) @@ -1692,11 +1103,9 @@ func ScanRepoPackages(repoPath string) (*RepoInfo, error) { } return nil }) - if len(info.Packages) == 0 { return nil, fmt.Errorf("no packages found in repository at %s", repoPath) } - return info, err } @@ -1707,18 +1116,14 @@ func cloneProtobufRepo(t *testing.T, modulePath string, commitHash string) *Repo cmd.Stderr = os.Stderr require.NoError(t, cmd.Run(), "Failed to clone repository") } - if commitHash != "" { cmd := exec.Command("git", "checkout", commitHash) cmd.Dir = modulePath require.NoError(t, cmd.Run(), "Failed to checkout commit hash") } - - // Scan packages after clone/checkout info, err := ScanRepoPackages(modulePath) require.NoError(t, err, "Failed to scan repo packages") - // Log the organized package information var pkgs []string for pkg := range info.Packages { if strings.Contains(pkg, "/tmp") { @@ -1727,10 +1132,8 @@ func cloneProtobufRepo(t *testing.T, modulePath string, commitHash string) *Repo pkgs = append(pkgs, pkg) } sort.Strings(pkgs) - t.Logf("πŸ“¦ Found %d packages in protobuf repo:", len(pkgs)) - // Group packages by their top-level directory groups := make(map[string][]string) for _, pkg := range pkgs { parts := strings.SplitN(pkg, "/", 2) @@ -1738,20 +1141,17 @@ func cloneProtobufRepo(t *testing.T, modulePath string, commitHash string) *Repo groups[topLevel] = append(groups[topLevel], pkg) } - // Print grouped packages var topLevels []string for k := range groups { topLevels = append(topLevels, k) } sort.Strings(topLevels) - for _, topLevel := range topLevels { t.Logf(" %s/", topLevel) for _, pkg := range groups[topLevel] { t.Logf(" β†’ %s", pkg) } } - return info } @@ -1767,16 +1167,13 @@ func copyDir(src, dst string) error { if err := os.MkdirAll(dst, 0755); err != nil { return err } - for _, entry := range entries { srcPath := filepath.Join(src, entry.Name()) dstPath := filepath.Join(dst, entry.Name()) - info, err := entry.Info() if err != nil { return err } - if info.IsDir() { if err = copyDir(srcPath, dstPath); err != nil { return err @@ -1796,57 +1193,14 @@ func copyFile(srcFile, dstFile string) error { return err } defer src.Close() - if err = os.MkdirAll(filepath.Dir(dstFile), 0755); err != nil { return err } - dst, err := os.Create(dstFile) if err != nil { return err } defer dst.Close() - _, err = io.Copy(dst, src) return err } - -var explorationTestConfigTemplateText = ` - {{- range $index, $target := .}} - {{- if $index}},{{end}} - "{{$target.ProbeId}}": { - "id": "{{$target.ProbeId}}", - "version": 0, - "type": "LOG_PROBE", - "language": "go", - "where": { - "typeName": "{{$target.PackageName}}", - "methodName": "{{$target.FunctionName}}" - }, - "tags": [], - "template": "Executed {{$target.PackageName}}.{{$target.FunctionName}}, it took {@duration}ms", - "segments": [ - { - "str": "Executed {{$target.PackageName}}.{{$target.FunctionName}}, it took " - }, - { - "dsl": "@duration", - "json": { - "ref": "@duration" - } - }, - { - "str": "ms" - } - ], - "captureSnapshot": false, - "capture": { - "maxReferenceDepth": 10 - }, - "sampling": { - "snapshotsPerSecond": 5000 - }, - "evaluateAt": "EXIT" - } - {{- end}} -` diff --git a/pkg/dynamicinstrumentation/testutil/exploration_tests/patches/protobuf/integration_test.go b/pkg/dynamicinstrumentation/testutil/exploration_tests/patches/protobuf/integration_test.go index ad9b259b8a85d4..cb8ef847f40b34 100644 --- a/pkg/dynamicinstrumentation/testutil/exploration_tests/patches/protobuf/integration_test.go +++ b/pkg/dynamicinstrumentation/testutil/exploration_tests/patches/protobuf/integration_test.go @@ -14,6 +14,7 @@ import ( "fmt" "io" "io/fs" + "math/rand" "net/http" "os" "os/exec" @@ -536,7 +537,12 @@ func (c command) mustRun(t *testing.T, args ...string) string { for i, arg := range args { cmdArgs = append(cmdArgs, arg) if i == 1 { // right after "test" - cmdArgs = append(cmdArgs, "-ldflags=-w=false -s=false", "-count=1", "-timeout=30m") + cmdArgs = append(cmdArgs, + "-ldflags=-w=false -s=false", + "-gcflags=all=-l", + "-count=1", + "-timeout=30m", + ) } } } else { @@ -552,7 +558,10 @@ func (c command) mustRun(t *testing.T, args ...string) string { if c.Env != nil { cmd.Env = c.Env } - cmd.Env = append(cmd.Env, "PWD="+cmd.Dir) + cmd.Env = append(cmd.Env, + fmt.Sprintf("PWD=%s", cmd.Dir), + fmt.Sprintf("DD_SERVICE=go-di-exploration-test-%d", rand.Int()), + ) cmd.Stdout = stdout cmd.Stderr = stderr