Skip to content

Commit

Permalink
✨ Fix multi source result
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent 7173f13 commit a636318
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 27 deletions.
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,22 @@
"cmd/tools/test-html-selector/examples/tutorial/01-basic-text.html"
],
"cwd": "${workspaceFolder}"
},
{
"name": "Test HTML Selector - PubMed FOXP3",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/tools/test-html-selector",
"args": [
"test-html-selector",
"--config",
"cmd/tools/test-html-selector/examples/pubmed.yaml",
"--urls",
"https://pubmed.ncbi.nlm.nih.gov/?term=foxp3",
"--extract"
],
"cwd": "${workspaceFolder}"
}
]
}
61 changes: 42 additions & 19 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type Selector struct {
}

type SimplifiedSample struct {
HTML interface{} `yaml:"html,omitempty"`
HTML []htmlsimplifier.Document `yaml:"html,omitempty"`
Context []htmlsimplifier.Document `yaml:"context,omitempty"`
Path string `yaml:"path,omitempty"`
}
Expand All @@ -54,8 +54,8 @@ type SimplifiedResult struct {
}

type SourceResult struct {
Source string `yaml:"source"`
Results []SimplifiedResult `yaml:"results"`
Source string `yaml:"source"`
Data map[string][]interface{} `yaml:"data"`
}

type TestHTMLSelectorCommand struct {
Expand Down Expand Up @@ -339,7 +339,33 @@ func (c *TestHTMLSelectorCommand) RunIntoWriter(
return yaml.NewEncoder(w).Encode(sourceResults)
}

return yaml.NewEncoder(w).Encode(sourceResults)
// Convert results to use Document structure for normal output
var newResults []SimplifiedResult
for _, sourceResult := range sourceResults {
for selectorName, matches := range sourceResult.Data {
var samples []SimplifiedSample
for _, match := range matches {
if doc, ok := match.(htmlsimplifier.Document); ok {
sample := SimplifiedSample{
HTML: []htmlsimplifier.Document{doc},
}
if s.ShowPath {
sample.Path = sourceResult.Source
}
samples = append(samples, sample)
}
}
newResults = append(newResults, SimplifiedResult{
Name: selectorName,
Selector: findSelectorByName(selectors, selectorName).Selector,
Type: findSelectorByName(selectors, selectorName).Type,
Count: len(matches),
Samples: samples,
})
}
}

return yaml.NewEncoder(w).Encode(newResults)
}

func findSelectorByName(selectors []Selector, name string) Selector {
Expand Down Expand Up @@ -390,7 +416,7 @@ func processSource(ctx context.Context, source string, selectors []Selector, s *
ContextChars: s.ContextChars,
Template: "",
},
})
}, f)
if err != nil {
return result, fmt.Errorf("failed to create tester: %w", err)
}
Expand All @@ -400,28 +426,25 @@ func processSource(ctx context.Context, source string, selectors []Selector, s *
return result, fmt.Errorf("failed to run tests: %w", err)
}

result.Data = make(map[string][]interface{})
for _, r := range results {
simplifiedResult := SimplifiedResult{
Name: r.Name,
Selector: r.Selector,
Type: r.Type,
Count: r.Count,
Samples: []SimplifiedSample{},
}
var matches []interface{}
for _, sample := range r.Samples {
// Process HTML content
htmlDocs, err := simplifier.ProcessHTML(sample.HTML)
if err == nil {
simplifiedSample := SimplifiedSample{
HTML: htmlDocs,
}
if s.ShowPath {
simplifiedSample.Path = source
for _, doc := range htmlDocs {
if doc.Text != "" {
matches = append(matches, doc.Text)
} else if doc.Markdown != "" {
matches = append(matches, doc.Markdown)
} else {
matches = append(matches, doc)
}
}
simplifiedResult.Samples = append(simplifiedResult.Samples, simplifiedSample)
}
}
result.Results = append(result.Results, simplifiedResult)
result.Data[r.Name] = matches
}

return result, nil
Expand Down
9 changes: 1 addition & 8 deletions cmd/tools/test-html-selector/selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"io"
"os"
"strings"

"github.com/PuerkitoBio/goquery"
Expand Down Expand Up @@ -143,13 +142,7 @@ type SelectorTester struct {
config *Config
}

func NewSelectorTester(config *Config) (*SelectorTester, error) {
f, err := os.Open(config.File)
if err != nil {
return nil, fmt.Errorf("failed to open HTML file: %w", err)
}
defer f.Close()

func NewSelectorTester(config *Config, f io.Reader) (*SelectorTester, error) {
engine, err := NewSelectorEngine(f)
if err != nil {
return nil, err
Expand Down

0 comments on commit a636318

Please sign in to comment.