Skip to content

Commit

Permalink
🔊 add Clay logging initialization to HTML tools
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent f88c384 commit d6bd34b
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 3 deletions.
6 changes: 5 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -557,4 +557,8 @@ Changed the HTML simplifier to return lists of documents instead of single docum
- Modified ProcessHTML to return []Document instead of Document
- Updated test-html-selector to handle document lists
- Changed output format to show arrays of documents for HTML and context
- Updated documentation with new output format examples
- Updated documentation with new output format examples

## Add Clay logging initialization to HTML tools

Added proper Clay logging initialization to test-html-selector and simplify-html tools to match mcp-server's logging setup.
12 changes: 12 additions & 0 deletions cmd/tools/simplify-html/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"os"

clay "github.com/go-go-golems/clay/pkg"
"github.com/go-go-golems/glazed/pkg/cli"
"github.com/go-go-golems/glazed/pkg/cmds"
"github.com/go-go-golems/glazed/pkg/cmds/layers"
Expand Down Expand Up @@ -207,8 +208,19 @@ func main() {
var rootCmd = &cobra.Command{
Use: "simplify-html",
Short: "Simplify and minimize HTML documents",
PersistentPreRun: func(cmd *cobra.Command, args []string) {
// reinitialize the logger because we can now parse --log-level and co
// from the command line flag
err := clay.InitLogger()
cobra.CheckErr(err)
},
}

err := clay.InitViper("simplify-html", rootCmd)
cobra.CheckErr(err)
err = clay.InitLogger()
cobra.CheckErr(err)

helpSystem := help.NewHelpSystem()
helpSystem.SetupCobraRootCommand(rootCmd)

Expand Down
12 changes: 12 additions & 0 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"os"

clay "github.com/go-go-golems/clay/pkg"
"github.com/go-go-golems/glazed/pkg/cli"
"github.com/go-go-golems/glazed/pkg/cmds"
"github.com/go-go-golems/glazed/pkg/cmds/layers"
Expand Down Expand Up @@ -286,8 +287,19 @@ func main() {
var rootCmd = &cobra.Command{
Use: "test-html-selector",
Short: "Test HTML/XPath selectors against HTML documents",
PersistentPreRun: func(cmd *cobra.Command, args []string) {
// reinitialize the logger because we can now parse --log-level and co
// from the command line flag
err := clay.InitLogger()
cobra.CheckErr(err)
},
}

err := clay.InitViper("test-html-selector", rootCmd)
cobra.CheckErr(err)
err = clay.InitLogger()
cobra.CheckErr(err)

helpSystem := help.NewHelpSystem()
helpSystem.SetupCobraRootCommand(rootCmd)

Expand Down
2 changes: 1 addition & 1 deletion pkg/htmlsimplifier/node_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func NewNodeHandler(opts Options) *NodeHandler {
// Configure default strategies
h.tagStrategies["html"] = StrategyUnwrap
h.tagStrategies["head"] = StrategyFilter
h.tagStrategies["body"] = StrategyUnwrap
h.tagStrategies["body"] = StrategyDefault

if opts.StripScripts {
h.tagStrategies["script"] = StrategyFilter
Expand Down
13 changes: 12 additions & 1 deletion pkg/htmlsimplifier/simplifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,18 @@ func (s *Simplifier) ProcessHTML(htmlContent string) ([]Document, error) {
doc.Find("[data-simplifier-keep]").RemoveAttr("data-simplifier-keep")
}

return s.processNode(doc.Get(0)), nil
docs := s.processNode(doc.Get(0))
if len(docs) == 0 {
return nil, fmt.Errorf("no documents found")
}
if len(docs) == 1 && docs[0].Tag == "body" {
if len(docs[0].Children) > 0 {
return docs[0].Children, nil
}
docs[0].Tag = ""
return []Document{docs[0]}, nil
}
return docs, nil
}

func (s *Simplifier) processNode(node *html.Node) []Document {
Expand Down

0 comments on commit d6bd34b

Please sign in to comment.