Skip to content

Commit

Permalink
✨ Add helpers to make template generation more solid
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent 882ad29 commit c4347a0
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
"DEBUG",
"--show-simplified",
"--config",
"/tmp/html-extraction-2025-01-26-17-42-34.yaml"
"/tmp/html-extraction-2025-01-26-18-20-00.yaml"
],
"cwd": "${workspaceFolder}"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,14 @@ selectors:
template: |
{{- range . }}
{{ $ := .Data }}
# User Profile
# Profile from {{ .Source }}
**Name**: {{ index $.user_name 0 }}
**Email**: {{ index $.user_email 0 }}
**Location**: {{ index $.user_location 0 }}
**Name**: {{ index .Data.user_name 0 }}
**Email**: {{ index .Data.user_email 0 }}
**Location**: {{ index .Data.user_location 0 }}
## Skills
{{- range $.user_skills }}
{{- range .Data.user_skills }}
- {{ . }}
{{- end }}
{{ end }}
Expand All @@ -367,6 +366,33 @@ config:
context_chars: 100
```
The data structure passed to the template engine is a list of source results, where each source result has this structure:
```yaml
- source: "file.html" # or URL
data:
selector_name: # matches the name in your selector config
- "First match as markdown"
- "Second match as markdown"
- "..."
another_selector:
- "First match"
- "Second match"
- source: "another-file.html"
data:
selector_name:
- "Matches from second file"
- "..."
```
You can access this data in your templates using:
- `.Source` - the source file/URL
- `.Data.$selector_name` - list of matches for a given selector
- `index .Data.$selector_name 0` - first match for a selector
- `range .Data.$selector_name` - iterate over all matches
The template has access to all [Sprig template functions](http://masterminds.github.io/sprig/) for string manipulation, date formatting, etc.
## Best Practices
1. **Clear Descriptions**
Expand All @@ -392,6 +418,7 @@ config:
- Use templates for formatting when the default YAML output isn't suitable
- Take advantage of Sprig functions for data manipulation
- Consider creating reusable template snippets
- Make sure you are iterating over {.Source, .Data}[] as you could have multiple sources
## Common Patterns
Expand Down
61 changes: 49 additions & 12 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ type Config struct {
Description string `yaml:"description"`
Selectors []Selector `yaml:"selectors"`
Config struct {
SampleCount int `yaml:"sample_count"`
ContextChars int `yaml:"context_chars"`
Template string `yaml:"template"`
SampleCount int `yaml:"sample_count"`
ContextChars int `yaml:"context_chars"`
} `yaml:"config"`
Template string `yaml:"template"`
}

type Selector struct {
Expand Down Expand Up @@ -318,7 +318,7 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
}

// If using extract or extract-template, process all matches without sample limit
if s.Extract || s.ExtractTemplate != "" {
if s.ExtractData || s.ExtractTemplate != "" || (config != nil && config.Template != "") {
// clear the selector results
for _, sourceResult := range sourceResults {
sourceResult.SelectorResults = []SelectorResult{}
Expand All @@ -338,19 +338,19 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
if err != nil {
return fmt.Errorf("failed to parse template file: %w", err)
}
return tmpl.Execute(w, sourceResults)
return executeTemplate(w, tmpl, sourceResults)
}

// Then try config file template if extract mode is on
if config != nil && config.Config.Template != "" {
if config != nil && config.Template != "" {
// Parse and execute template from config
tmpl, err := template.New("config").
Funcs(sprig.TxtFuncMap()).
Parse(config.Config.Template)
Parse(config.Template)
if err != nil {
return fmt.Errorf("failed to parse template from config: %w", err)
}
return tmpl.Execute(w, sourceResults)
return executeTemplate(w, tmpl, sourceResults)
}

// Default to YAML output
Expand Down Expand Up @@ -462,13 +462,11 @@ func processSource(
File: source,
Selectors: selectors,
Config: struct {
SampleCount int `yaml:"sample_count"`
ContextChars int `yaml:"context_chars"`
Template string `yaml:"template"`
SampleCount int `yaml:"sample_count"`
ContextChars int `yaml:"context_chars"`
}{
SampleCount: sampleCount,
ContextChars: s.ContextChars,
Template: "",
},
}, f)
if err != nil {
Expand Down Expand Up @@ -524,6 +522,45 @@ func loadConfig(path string) (*Config, error) {
return &config, nil
}

// executeTemplate handles template execution and provides a subset of data on error
func executeTemplate(w io.Writer, tmpl *template.Template, sourceResults []*SourceResult) error {
err := tmpl.Execute(w, sourceResults)
if err != nil {
// Create a subset of the data for error reporting
subset := make([]*SourceResult, 0)
for i, sr := range sourceResults {
if i >= 3 {
break
}
subsetResult := &SourceResult{
Source: sr.Source,
Data: make(map[string][]interface{}),
}

// Take first 3 samples for each selector
for name, matches := range sr.Data {
if len(matches) > 3 {
subsetResult.Data[name] = matches[:3]
} else {
subsetResult.Data[name] = matches
}
}
subset = append(subset, subsetResult)
}

// Print the error and data subset
fmt.Fprintf(os.Stderr, "Error executing template: %v\n", err)
fmt.Fprintf(os.Stderr, "Here is a subset of the input data:\n")
enc := yaml.NewEncoder(os.Stderr)
enc.SetIndent(2)
if err := enc.Encode(subset); err != nil {
fmt.Fprintf(os.Stderr, "Error encoding data subset: %v\n", err)
}
return fmt.Errorf("template execution failed: %w", err)
}
return nil
}

func main() {
var rootCmd = &cobra.Command{
Use: "html-selector",
Expand Down

0 comments on commit c4347a0

Please sign in to comment.