Skip to content

Commit

Permalink
✨ Add --no-template flag to disable template rendering
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent c4347a0 commit 3f08927
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 36 deletions.
8 changes: 7 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -705,4 +705,10 @@ Replaced manual markdown conversion with html-to-markdown library for better and
Added a new flag to control whether simplified HTML is shown in the output to reduce verbosity.

- Added `--show-simplified` flag (default: false) to control whether simplified HTML is included in output
- Modified output to only include simplified HTML and context when explicitly requested
- Modified output to only include simplified HTML and context when explicitly requested

HTML Selector Template Control
Added ability to disable template rendering in the HTML selector tool.

- Added --no-template flag to disable template rendering
- Template rendering can now be explicitly disabled even when config file or extract options are used
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ selectors:
Extracts the introduction paragraph.
Uses a class selector to find paragraphs with class "intro".
# Template iterates over a list of documents, where each document contains
# the source (file/URL) and its extracted data
template: |
# Content from {{ .Source }}
## Page Title
{{ index .Data.page_title 0 }}
## Introduction
{{ index .Data.intro_text 0 }}
config:
sample_count: 5
context_chars: 100
Expand Down Expand Up @@ -191,6 +202,18 @@ selectors:
Extracts prices from product blocks.
Shows how to target deeply nested elements.
# Template iterates over a list of documents, where each document contains
# the source (file/URL) and its extracted data
template: |
# Products from {{ .Source }}
{{ $ := . }}
{{- range $index, $name := .Data.product_names }}
## Product {{ add $index 1 }}
- Name: {{ $name }}
- Price: {{ index $.Data.prices $index }}
{{- end }}
config:
sample_count: 5
context_chars: 100
Expand Down Expand Up @@ -245,8 +268,25 @@ selectors:
Extracts items from the features list.
Simple example of list extraction.
# Template iterates over a list of documents, where each document contains
# the source (file/URL) and its extracted data
template: |
# Data from {{ .Source }}
## Table Data
| Row | Content |
|-----|---------|
{{- range .Data.table_cells }}
| {{ . }} |
{{- end }}
## Features
{{- range .Data.list_items }}
- {{ . }}
{{- end }}
config:
sample_count: 10 # Increased to show more rows
sample_count: 10
context_chars: 100
```
Expand Down Expand Up @@ -293,6 +333,22 @@ selectors:
Extracts all paragraphs that come before the comments section.
Shows XPath's powerful axis navigation.
# Template iterates over a list of documents, where each document contains
# the source (file/URL) and its extracted data
template: |
# Content Analysis from {{ .Source }}
## Second Paragraph
{{ index .Data.second_paragraph 0 }}
## Last Comment
{{ index .Data.last_comment 0 }}
## Paragraphs Before Comments
{{- range .Data.paragraphs_before_comments }}
- {{ . }}
{{- end }}
config:
sample_count: 5
context_chars: 100
Expand Down Expand Up @@ -348,7 +404,6 @@ selectors:
Extracts the user's skills.
template: |
{{- range . }}
# Profile from {{ .Source }}
**Name**: {{ index .Data.user_name 0 }}
Expand All @@ -359,7 +414,6 @@ template: |
{{- range .Data.user_skills }}
- {{ . }}
{{- end }}
{{ end }}
config:
sample_count: 5
Expand Down
78 changes: 46 additions & 32 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ type HTMLSelectorSettings struct {
Extract bool `glazed.parameter:"extract"`
ExtractData bool `glazed.parameter:"extract-data"`
ExtractTemplate string `glazed.parameter:"extract-template"`
NoTemplate bool `glazed.parameter:"no-template"`
ShowContext bool `glazed.parameter:"show-context"`
ShowPath bool `glazed.parameter:"show-path"`
ShowSimplified bool `glazed.parameter:"show-simplified"`
Expand All @@ -93,6 +94,13 @@ type HTMLSelectorSettings struct {
MaxTableRows int `glazed.parameter:"max-table-rows"`
}

func (s *HTMLSelectorSettings) ShouldTemplate() bool {
if s.NoTemplate {
return false
}
return s.ExtractData || s.ExtractTemplate != "" || (s.ConfigFile != "" && s.ConfigFile != "-")
}

func NewHTMLSelectorCommand() (*HTMLSelectorCommand, error) {
return &HTMLSelectorCommand{
CommandDescription: cmds.NewCommandDescription(
Expand Down Expand Up @@ -144,6 +152,12 @@ It provides match counts and contextual examples to verify selector accuracy.`),
parameters.ParameterTypeString,
parameters.WithHelp("Go template file to render with extracted data"),
),
parameters.NewParameterDefinition(
"no-template",
parameters.ParameterTypeBool,
parameters.WithHelp("Do not use templates"),
parameters.WithDefault(false),
),
parameters.NewParameterDefinition(
"show-context",
parameters.ParameterTypeBool,
Expand Down Expand Up @@ -301,7 +315,7 @@ func (c *HTMLSelectorCommand) RunIntoWriter(

// Process files
for _, file := range s.Files {
result, err := processSource(ctx, file, selectors, s, simplifier)
result, err := processSource(ctx, file, selectors, s)
if err != nil {
return fmt.Errorf("failed to process file %s: %w", file, err)
}
Expand All @@ -310,15 +324,14 @@ func (c *HTMLSelectorCommand) RunIntoWriter(

// Process URLs
for _, url := range s.URLs {
result, err := processSource(ctx, url, selectors, s, simplifier)
result, err := processSource(ctx, url, selectors, s)
if err != nil {
return fmt.Errorf("failed to process URL %s: %w", url, err)
}
sourceResults = append(sourceResults, result)
}

// If using extract or extract-template, process all matches without sample limit
if s.ExtractData || s.ExtractTemplate != "" || (config != nil && config.Template != "") {
if s.ShouldTemplate() {
// clear the selector results
for _, sourceResult := range sourceResults {
sourceResult.SelectorResults = []SelectorResult{}
Expand Down Expand Up @@ -415,21 +428,11 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
return yaml.NewEncoder(w).Encode(newResults)
}

func findSelectorByName(selectors []Selector, name string) Selector {
for _, s := range selectors {
if s.Name == name {
return s
}
}
return Selector{}
}

func processSource(
ctx context.Context,
source string,
selectors []Selector,
s *HTMLSelectorSettings,
simplifier *htmlsimplifier.Simplifier,
) (*SourceResult, error) {
result := &SourceResult{
Source: source,
Expand All @@ -454,7 +457,7 @@ func processSource(
}

sampleCount := s.SampleCount
if s.Extract || s.ExtractTemplate != "" {
if s.ShouldTemplate() {
sampleCount = 0
}

Expand Down Expand Up @@ -524,14 +527,25 @@ func loadConfig(path string) (*Config, error) {

// executeTemplate handles template execution and provides a subset of data on error
func executeTemplate(w io.Writer, tmpl *template.Template, sourceResults []*SourceResult) error {
// First try executing the template with all source results
err := tmpl.Execute(w, sourceResults)
if err != nil {
// Create a subset of the data for error reporting
subset := make([]*SourceResult, 0)
for i, sr := range sourceResults {
if i >= 3 {
break
}
if err == nil {
return nil
}

// If that fails, try executing individually for each source
fmt.Fprintf(os.Stderr, "Error executing combined template: %v\n", err)
fmt.Fprintf(os.Stderr, "Trying individual execution...\n")

for i, sr := range sourceResults {
if i > 0 {
fmt.Fprintf(w, "\n---\n")
}
fmt.Fprintf(w, "# Source: %s\n", sr.Source)

err := tmpl.Execute(w, sr)
if err != nil {
// Create subset of failed source result for error reporting
subsetResult := &SourceResult{
Source: sr.Source,
Data: make(map[string][]interface{}),
Expand All @@ -545,19 +559,19 @@ func executeTemplate(w io.Writer, tmpl *template.Template, sourceResults []*Sour
subsetResult.Data[name] = matches
}
}
subset = append(subset, subsetResult)
}

// Print the error and data subset
fmt.Fprintf(os.Stderr, "Error executing template: %v\n", err)
fmt.Fprintf(os.Stderr, "Here is a subset of the input data:\n")
enc := yaml.NewEncoder(os.Stderr)
enc.SetIndent(2)
if err := enc.Encode(subset); err != nil {
fmt.Fprintf(os.Stderr, "Error encoding data subset: %v\n", err)
// Print the error and data subset
fmt.Fprintf(os.Stderr, "Error executing template for source %s: %v\n", sr.Source, err)
fmt.Fprintf(os.Stderr, "Here is a subset of the input data:\n")
enc := yaml.NewEncoder(os.Stderr)
enc.SetIndent(2)
if err := enc.Encode(subsetResult); err != nil {
fmt.Fprintf(os.Stderr, "Error encoding data subset: %v\n", err)
}
return fmt.Errorf("template execution failed for source %s: %w", sr.Source, err)
}
return fmt.Errorf("template execution failed: %w", err)
}

return nil
}

Expand Down

0 comments on commit 3f08927

Please sign in to comment.