Skip to content

Commit

Permalink
♻️ Convert test-html-selector to use glazed framework
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent c6e8d48 commit 6ceaa93
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 78 deletions.
11 changes: 10 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -530,4 +530,13 @@ Enhanced the HTML simplifier to include id and class attributes in the tag name

- Modified tag format to include id and classes (e.g. div#myid.class1.class2)
- Removed id and class from regular attributes list
- Improved readability of HTML structure in YAML output
- Improved readability of HTML structure in YAML output

# Convert test-html-selector to glazed framework

Converted the test-html-selector tool to use the glazed framework for better CLI integration and consistency with other tools. The file field has been removed from the config file and is now required as a command line argument.

- Converted test-html-selector to use glazed framework
- Removed file field from config file
- Added --sample-count and --context-chars command line flags
- Updated documentation to reflect new command structure
53 changes: 27 additions & 26 deletions cmd/tools/test-html-selector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@ A command-line tool for testing CSS and XPath selectors against HTML documents.

- Support for both CSS and XPath selectors
- Configurable sample count and context size
- YAML configuration and output format
- YAML configuration for selectors
- DOM path visualization for matched elements
- Parent context for each match
- Command-line override for input file
- Extract and print all matches for each selector

## Installation
Expand All @@ -23,7 +22,6 @@ go install ./cmd/tools/test-html-selector
1. Create a YAML configuration file:

```yaml
file: path/to/your.html # Optional if using --input flag
selectors:
- name: product_titles
selector: .product-card h2
Expand All @@ -39,16 +37,36 @@ config:
2. Run the tool:
```bash
# Using config file only
test-html-selector -c config.yaml
# Basic usage
test-html-selector --config config.yaml --input path/to/input.html

# Override input file from command line
test-html-selector -c config.yaml -i path/to/different.html
# Override sample count and context size
test-html-selector --config config.yaml --input path/to/input.html --sample-count 10 --context-chars 200

# Extract and print all matches
test-html-selector -c config.yaml -e
test-html-selector --config config.yaml --input path/to/input.html --extract
```

## Configuration Options

### Command Line Flags

- `--config`: Path to YAML config file (required)
- `--input`: Path to HTML input file (required)
- `--extract`: Extract and print all matches for each selector
- `--sample-count`: Maximum number of examples to show (default: 5)
- `--context-chars`: Number of characters of context to include (default: 100)

### YAML Configuration

- `selectors`: List of selectors to test
- `name`: Friendly name for the selector
- `selector`: CSS or XPath selector string
- `type`: Either "css" or "xpath"
- `config`:
- `sample_count`: Maximum number of examples to show (can be overridden by --sample-count)
- `context_chars`: Number of characters of context to include (can be overridden by --context-chars)

## Example Output

```yaml
Expand All @@ -60,21 +78,4 @@ test-html-selector -c config.yaml -e
context: "<div class=\"info\"><h2>Awesome Product 1</h2><div class=\"price\">$19.99</div></div>"
path: "html > body > div > div > div > h2"
# ... more samples ...
```

## Configuration Options

- `file`: Path to the HTML file to analyze (can be overridden by --input flag)
- `selectors`: List of selectors to test
- `name`: Friendly name for the selector
- `selector`: CSS or XPath selector string
- `type`: Either "css" or "xpath"
- `config`:
- `sample_count`: Maximum number of examples to show
- `context_chars`: Number of characters of context to include

## Command Line Flags

- `-c, --config`: Path to YAML config file (required)
- `-i, --input`: Path to HTML input file (overrides config file)
- `-e, --extract`: Extract and print all matches for each selector
```
1 change: 0 additions & 1 deletion cmd/tools/test-html-selector/example.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
file: example.html
selectors:
- name: product_titles
selector: .product-card h2
Expand Down
171 changes: 121 additions & 50 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
package main

import (
"context"
"fmt"
"log"
"io"
"os"

"github.com/go-go-golems/glazed/pkg/cli"
"github.com/go-go-golems/glazed/pkg/cmds"
"github.com/go-go-golems/glazed/pkg/cmds/layers"
"github.com/go-go-golems/glazed/pkg/cmds/parameters"
"github.com/go-go-golems/glazed/pkg/help"
"github.com/spf13/cobra"
"gopkg.in/yaml.v3"
)
Expand All @@ -24,62 +30,112 @@ type Selector struct {
Type string `yaml:"type"` // "css" or "xpath"
}

var (
configFile string
inputFile string
extract bool
)
type TestHTMLSelectorCommand struct {
*cmds.CommandDescription
}

var rootCmd = &cobra.Command{
Use: "test-html-selector",
Short: "Test HTML/XPath selectors against HTML documents",
Long: `A tool for testing CSS and XPath selectors against HTML documents.
It provides match counts and contextual examples to verify selector accuracy.`,
RunE: func(cmd *cobra.Command, args []string) error {
config, err := loadConfig(configFile)
if err != nil {
return fmt.Errorf("failed to load config: %w", err)
}
type TestHTMLSelectorSettings struct {
ConfigFile string `glazed.parameter:"config"`
InputFile string `glazed.parameter:"input"`
Extract bool `glazed.parameter:"extract"`
SampleCount int `glazed.parameter:"sample-count"`
ContextChars int `glazed.parameter:"context-chars"`
}

// Override file from config if input file is provided
if inputFile != "" {
config.File = inputFile
}
func NewTestHTMLSelectorCommand() (*TestHTMLSelectorCommand, error) {
return &TestHTMLSelectorCommand{
CommandDescription: cmds.NewCommandDescription(
"test-html-selector",
cmds.WithShort("Test HTML/XPath selectors against HTML documents"),
cmds.WithLong(`A tool for testing CSS and XPath selectors against HTML documents.
It provides match counts and contextual examples to verify selector accuracy.`),
cmds.WithFlags(
parameters.NewParameterDefinition(
"config",
parameters.ParameterTypeString,
parameters.WithHelp("Path to YAML config file containing selectors"),
parameters.WithRequired(true),
),
parameters.NewParameterDefinition(
"input",
parameters.ParameterTypeString,
parameters.WithHelp("Path to HTML input file"),
parameters.WithRequired(true),
),
parameters.NewParameterDefinition(
"extract",
parameters.ParameterTypeBool,
parameters.WithHelp("Extract and print all matches for each selector"),
parameters.WithDefault(false),
),
parameters.NewParameterDefinition(
"sample-count",
parameters.ParameterTypeInteger,
parameters.WithHelp("Maximum number of examples to show"),
parameters.WithDefault(5),
),
parameters.NewParameterDefinition(
"context-chars",
parameters.ParameterTypeInteger,
parameters.WithHelp("Number of characters of context to include"),
parameters.WithDefault(100),
),
),
),
}, nil
}

if config.File == "" {
return fmt.Errorf("HTML input file is required (either in config or via --input flag)")
}
func (c *TestHTMLSelectorCommand) RunIntoWriter(
ctx context.Context,
parsedLayers *layers.ParsedLayers,
w io.Writer,
) error {
s := &TestHTMLSelectorSettings{}
if err := parsedLayers.InitializeStruct(layers.DefaultSlug, s); err != nil {
return err
}

tester, err := NewSelectorTester(config)
if err != nil {
return fmt.Errorf("failed to create tester: %w", err)
}
config, err := loadConfig(s.ConfigFile)
if err != nil {
return fmt.Errorf("failed to load config: %w", err)
}

results, err := tester.Run(cmd.Context())
if err != nil {
return fmt.Errorf("failed to run tests: %w", err)
}
// Override config settings with command line parameters
config.Config.SampleCount = s.SampleCount
config.Config.ContextChars = s.ContextChars

tester, err := NewSelectorTester(&Config{
File: s.InputFile,
Selectors: config.Selectors,
Config: struct {
SampleCount int `yaml:"sample_count"`
ContextChars int `yaml:"context_chars"`
}{
SampleCount: s.SampleCount,
ContextChars: s.ContextChars,
},
})
if err != nil {
return fmt.Errorf("failed to create tester: %w", err)
}

results, err := tester.Run(ctx)
if err != nil {
return fmt.Errorf("failed to run tests: %w", err)
}

if extract {
for _, result := range results {
fmt.Printf("Selector: %s\n", result.Selector)
for _, sample := range result.Samples {
fmt.Println(sample.HTML)
}
if s.Extract {
for _, result := range results {
fmt.Fprintf(w, "Selector: %s\n", result.Selector)
for _, sample := range result.Samples {
fmt.Fprintln(w, sample.HTML)
}
} else {
return yaml.NewEncoder(os.Stdout).Encode(results)
}
} else {
return yaml.NewEncoder(w).Encode(results)
}

return nil
},
}

func init() {
rootCmd.PersistentFlags().StringVarP(&configFile, "config", "c", "", "Path to YAML config file")
rootCmd.PersistentFlags().StringVarP(&inputFile, "input", "i", "", "Path to HTML input file (overrides config file)")
rootCmd.PersistentFlags().BoolVarP(&extract, "extract", "e", false, "Extract and print all matches for each selector")
rootCmd.MarkPersistentFlagRequired("config")
return nil
}

func loadConfig(path string) (*Config, error) {
Expand All @@ -98,7 +154,22 @@ func loadConfig(path string) (*Config, error) {
}

func main() {
if err := rootCmd.Execute(); err != nil {
log.Fatal(err)
var rootCmd = &cobra.Command{
Use: "test-html-selector",
Short: "Test HTML/XPath selectors against HTML documents",
}

helpSystem := help.NewHelpSystem()
helpSystem.SetupCobraRootCommand(rootCmd)

cmd, err := NewTestHTMLSelectorCommand()
cobra.CheckErr(err)

cobraCmd, err := cli.BuildCobraCommandFromWriterCommand(cmd)
cobra.CheckErr(err)

rootCmd.AddCommand(cobraCmd)

err = rootCmd.Execute()
cobra.CheckErr(err)
}

0 comments on commit 6ceaa93

Please sign in to comment.