Skip to content

Commit

Permalink
🐛 Fix selector match count to show total matches
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent 0042576 commit 63fc514
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
10 changes: 9 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,12 @@ Simplified data extraction by merging --extract and --extract-data flags:
- Changed --extract to always output data in YAML format
- Removed redundant --extract-data flag
- Maintained template support with --extract-template and config file templates
- Improved consistency in data output formats
- Improved consistency in data output formats

# Fixed Selector Match Count

Fixed the selector match count to show the total number of matches instead of the truncated sample count:

- Count now shows total number of matches before sample truncation
- Sample count limit only affects displayed samples, not the total count
- Provides more accurate match statistics while keeping output manageable
12 changes: 7 additions & 5 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ type Selector struct {
}

type SimplifiedSample struct {
HTML []htmlsimplifier.Document `yaml:"html"`
Context []htmlsimplifier.Document `yaml:"context"`
Path string `yaml:"path"`
HTML []htmlsimplifier.Document `yaml:"html,omitempty"`
Context []htmlsimplifier.Document `yaml:"context,omitempty"`
Path string `yaml:"path,omitempty"`
}

type SimplifiedResult struct {
Expand Down Expand Up @@ -281,9 +281,9 @@ func (c *TestHTMLSelectorCommand) RunIntoWriter(

// If using extract or extract-template, process all matches without sample limit
if s.Extract || s.ExtractTemplate != "" {
extractedData := make(map[string][]string)
extractedData := make(map[string][]interface{})
for _, result := range results {
var matches []string
var matches []interface{}
for _, sample := range result.Samples {
// Process HTML content
htmlDocs, err := simplifier.ProcessHTML(sample.HTML)
Expand All @@ -293,6 +293,8 @@ func (c *TestHTMLSelectorCommand) RunIntoWriter(
matches = append(matches, doc.Text)
} else if doc.Markdown != "" {
matches = append(matches, doc.Markdown)
} else {
matches = append(matches, doc)
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion cmd/tools/test-html-selector/selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ func (st *SelectorTester) Run(ctx context.Context) ([]SelectorResult, error) {
return nil, fmt.Errorf("selector '%s' failed: %w", sel.Name, err)
}

totalCount := len(samples)

// Limit samples to configured count
if len(samples) > st.config.Config.SampleCount {
samples = samples[:st.config.Config.SampleCount]
Expand All @@ -175,7 +177,7 @@ func (st *SelectorTester) Run(ctx context.Context) ([]SelectorResult, error) {
Name: sel.Name,
Selector: sel.Selector,
Type: sel.Type,
Count: len(samples),
Count: totalCount,
Samples: samples,
})
}
Expand Down

0 comments on commit 63fc514

Please sign in to comment.