From 6ceaa93a9d4c4f56cfa9a5ff2aa497c41b1e7356 Mon Sep 17 00:00:00 2001 From: Manuel Odendahl Date: Sun, 26 Jan 2025 13:53:35 -0500 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Convert=20test-html-select?= =?UTF-8?q?or=20to=20use=20glazed=20framework?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- changelog.md | 11 +- cmd/tools/test-html-selector/README.md | 53 +++---- cmd/tools/test-html-selector/example.yaml | 1 - cmd/tools/test-html-selector/main.go | 171 +++++++++++++++------- 4 files changed, 158 insertions(+), 78 deletions(-) diff --git a/changelog.md b/changelog.md index f370065..fb8316d 100644 --- a/changelog.md +++ b/changelog.md @@ -530,4 +530,13 @@ Enhanced the HTML simplifier to include id and class attributes in the tag name - Modified tag format to include id and classes (e.g. div#myid.class1.class2) - Removed id and class from regular attributes list -- Improved readability of HTML structure in YAML output \ No newline at end of file +- Improved readability of HTML structure in YAML output + +# Convert test-html-selector to glazed framework + +Converted the test-html-selector tool to use the glazed framework for better CLI integration and consistency with other tools. The file field has been removed from the config file and is now required as a command line argument. + +- Converted test-html-selector to use glazed framework +- Removed file field from config file +- Added --sample-count and --context-chars command line flags +- Updated documentation to reflect new command structure \ No newline at end of file diff --git a/cmd/tools/test-html-selector/README.md b/cmd/tools/test-html-selector/README.md index b155945..5e39cdf 100644 --- a/cmd/tools/test-html-selector/README.md +++ b/cmd/tools/test-html-selector/README.md @@ -6,10 +6,9 @@ A command-line tool for testing CSS and XPath selectors against HTML documents. - Support for both CSS and XPath selectors - Configurable sample count and context size -- YAML configuration and output format +- YAML configuration for selectors - DOM path visualization for matched elements - Parent context for each match -- Command-line override for input file - Extract and print all matches for each selector ## Installation @@ -23,7 +22,6 @@ go install ./cmd/tools/test-html-selector 1. Create a YAML configuration file: ```yaml -file: path/to/your.html # Optional if using --input flag selectors: - name: product_titles selector: .product-card h2 @@ -39,16 +37,36 @@ config: 2. Run the tool: ```bash -# Using config file only -test-html-selector -c config.yaml +# Basic usage +test-html-selector --config config.yaml --input path/to/input.html -# Override input file from command line -test-html-selector -c config.yaml -i path/to/different.html +# Override sample count and context size +test-html-selector --config config.yaml --input path/to/input.html --sample-count 10 --context-chars 200 # Extract and print all matches -test-html-selector -c config.yaml -e +test-html-selector --config config.yaml --input path/to/input.html --extract ``` +## Configuration Options + +### Command Line Flags + +- `--config`: Path to YAML config file (required) +- `--input`: Path to HTML input file (required) +- `--extract`: Extract and print all matches for each selector +- `--sample-count`: Maximum number of examples to show (default: 5) +- `--context-chars`: Number of characters of context to include (default: 100) + +### YAML Configuration + +- `selectors`: List of selectors to test + - `name`: Friendly name for the selector + - `selector`: CSS or XPath selector string + - `type`: Either "css" or "xpath" +- `config`: + - `sample_count`: Maximum number of examples to show (can be overridden by --sample-count) + - `context_chars`: Number of characters of context to include (can be overridden by --context-chars) + ## Example Output ```yaml @@ -60,21 +78,4 @@ test-html-selector -c config.yaml -e context: "

Awesome Product 1

$19.99
" path: "html > body > div > div > div > h2" # ... more samples ... -``` - -## Configuration Options - -- `file`: Path to the HTML file to analyze (can be overridden by --input flag) -- `selectors`: List of selectors to test - - `name`: Friendly name for the selector - - `selector`: CSS or XPath selector string - - `type`: Either "css" or "xpath" -- `config`: - - `sample_count`: Maximum number of examples to show - - `context_chars`: Number of characters of context to include - -## Command Line Flags - -- `-c, --config`: Path to YAML config file (required) -- `-i, --input`: Path to HTML input file (overrides config file) -- `-e, --extract`: Extract and print all matches for each selector \ No newline at end of file +``` \ No newline at end of file diff --git a/cmd/tools/test-html-selector/example.yaml b/cmd/tools/test-html-selector/example.yaml index aca79d7..9447d21 100644 --- a/cmd/tools/test-html-selector/example.yaml +++ b/cmd/tools/test-html-selector/example.yaml @@ -1,4 +1,3 @@ -file: example.html selectors: - name: product_titles selector: .product-card h2 diff --git a/cmd/tools/test-html-selector/main.go b/cmd/tools/test-html-selector/main.go index 858dd27..c9e05b4 100644 --- a/cmd/tools/test-html-selector/main.go +++ b/cmd/tools/test-html-selector/main.go @@ -1,10 +1,16 @@ package main import ( + "context" "fmt" - "log" + "io" "os" + "github.com/go-go-golems/glazed/pkg/cli" + "github.com/go-go-golems/glazed/pkg/cmds" + "github.com/go-go-golems/glazed/pkg/cmds/layers" + "github.com/go-go-golems/glazed/pkg/cmds/parameters" + "github.com/go-go-golems/glazed/pkg/help" "github.com/spf13/cobra" "gopkg.in/yaml.v3" ) @@ -24,62 +30,112 @@ type Selector struct { Type string `yaml:"type"` // "css" or "xpath" } -var ( - configFile string - inputFile string - extract bool -) +type TestHTMLSelectorCommand struct { + *cmds.CommandDescription +} -var rootCmd = &cobra.Command{ - Use: "test-html-selector", - Short: "Test HTML/XPath selectors against HTML documents", - Long: `A tool for testing CSS and XPath selectors against HTML documents. -It provides match counts and contextual examples to verify selector accuracy.`, - RunE: func(cmd *cobra.Command, args []string) error { - config, err := loadConfig(configFile) - if err != nil { - return fmt.Errorf("failed to load config: %w", err) - } +type TestHTMLSelectorSettings struct { + ConfigFile string `glazed.parameter:"config"` + InputFile string `glazed.parameter:"input"` + Extract bool `glazed.parameter:"extract"` + SampleCount int `glazed.parameter:"sample-count"` + ContextChars int `glazed.parameter:"context-chars"` +} - // Override file from config if input file is provided - if inputFile != "" { - config.File = inputFile - } +func NewTestHTMLSelectorCommand() (*TestHTMLSelectorCommand, error) { + return &TestHTMLSelectorCommand{ + CommandDescription: cmds.NewCommandDescription( + "test-html-selector", + cmds.WithShort("Test HTML/XPath selectors against HTML documents"), + cmds.WithLong(`A tool for testing CSS and XPath selectors against HTML documents. +It provides match counts and contextual examples to verify selector accuracy.`), + cmds.WithFlags( + parameters.NewParameterDefinition( + "config", + parameters.ParameterTypeString, + parameters.WithHelp("Path to YAML config file containing selectors"), + parameters.WithRequired(true), + ), + parameters.NewParameterDefinition( + "input", + parameters.ParameterTypeString, + parameters.WithHelp("Path to HTML input file"), + parameters.WithRequired(true), + ), + parameters.NewParameterDefinition( + "extract", + parameters.ParameterTypeBool, + parameters.WithHelp("Extract and print all matches for each selector"), + parameters.WithDefault(false), + ), + parameters.NewParameterDefinition( + "sample-count", + parameters.ParameterTypeInteger, + parameters.WithHelp("Maximum number of examples to show"), + parameters.WithDefault(5), + ), + parameters.NewParameterDefinition( + "context-chars", + parameters.ParameterTypeInteger, + parameters.WithHelp("Number of characters of context to include"), + parameters.WithDefault(100), + ), + ), + ), + }, nil +} - if config.File == "" { - return fmt.Errorf("HTML input file is required (either in config or via --input flag)") - } +func (c *TestHTMLSelectorCommand) RunIntoWriter( + ctx context.Context, + parsedLayers *layers.ParsedLayers, + w io.Writer, +) error { + s := &TestHTMLSelectorSettings{} + if err := parsedLayers.InitializeStruct(layers.DefaultSlug, s); err != nil { + return err + } - tester, err := NewSelectorTester(config) - if err != nil { - return fmt.Errorf("failed to create tester: %w", err) - } + config, err := loadConfig(s.ConfigFile) + if err != nil { + return fmt.Errorf("failed to load config: %w", err) + } - results, err := tester.Run(cmd.Context()) - if err != nil { - return fmt.Errorf("failed to run tests: %w", err) - } + // Override config settings with command line parameters + config.Config.SampleCount = s.SampleCount + config.Config.ContextChars = s.ContextChars + + tester, err := NewSelectorTester(&Config{ + File: s.InputFile, + Selectors: config.Selectors, + Config: struct { + SampleCount int `yaml:"sample_count"` + ContextChars int `yaml:"context_chars"` + }{ + SampleCount: s.SampleCount, + ContextChars: s.ContextChars, + }, + }) + if err != nil { + return fmt.Errorf("failed to create tester: %w", err) + } + + results, err := tester.Run(ctx) + if err != nil { + return fmt.Errorf("failed to run tests: %w", err) + } - if extract { - for _, result := range results { - fmt.Printf("Selector: %s\n", result.Selector) - for _, sample := range result.Samples { - fmt.Println(sample.HTML) - } + if s.Extract { + for _, result := range results { + fmt.Fprintf(w, "Selector: %s\n", result.Selector) + for _, sample := range result.Samples { + fmt.Fprintln(w, sample.HTML) } - } else { - return yaml.NewEncoder(os.Stdout).Encode(results) } + } else { + return yaml.NewEncoder(w).Encode(results) + } - return nil - }, -} - -func init() { - rootCmd.PersistentFlags().StringVarP(&configFile, "config", "c", "", "Path to YAML config file") - rootCmd.PersistentFlags().StringVarP(&inputFile, "input", "i", "", "Path to HTML input file (overrides config file)") - rootCmd.PersistentFlags().BoolVarP(&extract, "extract", "e", false, "Extract and print all matches for each selector") - rootCmd.MarkPersistentFlagRequired("config") + return nil } func loadConfig(path string) (*Config, error) { @@ -98,7 +154,22 @@ func loadConfig(path string) (*Config, error) { } func main() { - if err := rootCmd.Execute(); err != nil { - log.Fatal(err) + var rootCmd = &cobra.Command{ + Use: "test-html-selector", + Short: "Test HTML/XPath selectors against HTML documents", } + + helpSystem := help.NewHelpSystem() + helpSystem.SetupCobraRootCommand(rootCmd) + + cmd, err := NewTestHTMLSelectorCommand() + cobra.CheckErr(err) + + cobraCmd, err := cli.BuildCobraCommandFromWriterCommand(cmd) + cobra.CheckErr(err) + + rootCmd.AddCommand(cobraCmd) + + err = rootCmd.Execute() + cobra.CheckErr(err) }