From d6bd34b413a514b840ccdae0bf60e453643eb8b1 Mon Sep 17 00:00:00 2001 From: Manuel Odendahl Date: Sun, 26 Jan 2025 14:06:50 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=8A=20add=20Clay=20logging=20initializ?= =?UTF-8?q?ation=20to=20HTML=20tools?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- changelog.md | 6 +++++- cmd/tools/simplify-html/main.go | 12 ++++++++++++ cmd/tools/test-html-selector/main.go | 12 ++++++++++++ pkg/htmlsimplifier/node_handler.go | 2 +- pkg/htmlsimplifier/simplifier.go | 13 ++++++++++++- 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/changelog.md b/changelog.md index 1ba5eae..0fcdfb2 100644 --- a/changelog.md +++ b/changelog.md @@ -557,4 +557,8 @@ Changed the HTML simplifier to return lists of documents instead of single docum - Modified ProcessHTML to return []Document instead of Document - Updated test-html-selector to handle document lists - Changed output format to show arrays of documents for HTML and context -- Updated documentation with new output format examples \ No newline at end of file +- Updated documentation with new output format examples + +## Add Clay logging initialization to HTML tools + +Added proper Clay logging initialization to test-html-selector and simplify-html tools to match mcp-server's logging setup. \ No newline at end of file diff --git a/cmd/tools/simplify-html/main.go b/cmd/tools/simplify-html/main.go index bc206f2..0e46f94 100644 --- a/cmd/tools/simplify-html/main.go +++ b/cmd/tools/simplify-html/main.go @@ -6,6 +6,7 @@ import ( "io" "os" + clay "github.com/go-go-golems/clay/pkg" "github.com/go-go-golems/glazed/pkg/cli" "github.com/go-go-golems/glazed/pkg/cmds" "github.com/go-go-golems/glazed/pkg/cmds/layers" @@ -207,8 +208,19 @@ func main() { var rootCmd = &cobra.Command{ Use: "simplify-html", Short: "Simplify and minimize HTML documents", + PersistentPreRun: func(cmd *cobra.Command, args []string) { + // reinitialize the logger because we can now parse --log-level and co + // from the command line flag + err := clay.InitLogger() + cobra.CheckErr(err) + }, } + err := clay.InitViper("simplify-html", rootCmd) + cobra.CheckErr(err) + err = clay.InitLogger() + cobra.CheckErr(err) + helpSystem := help.NewHelpSystem() helpSystem.SetupCobraRootCommand(rootCmd) diff --git a/cmd/tools/test-html-selector/main.go b/cmd/tools/test-html-selector/main.go index 34db104..6ecbaa9 100644 --- a/cmd/tools/test-html-selector/main.go +++ b/cmd/tools/test-html-selector/main.go @@ -6,6 +6,7 @@ import ( "io" "os" + clay "github.com/go-go-golems/clay/pkg" "github.com/go-go-golems/glazed/pkg/cli" "github.com/go-go-golems/glazed/pkg/cmds" "github.com/go-go-golems/glazed/pkg/cmds/layers" @@ -286,8 +287,19 @@ func main() { var rootCmd = &cobra.Command{ Use: "test-html-selector", Short: "Test HTML/XPath selectors against HTML documents", + PersistentPreRun: func(cmd *cobra.Command, args []string) { + // reinitialize the logger because we can now parse --log-level and co + // from the command line flag + err := clay.InitLogger() + cobra.CheckErr(err) + }, } + err := clay.InitViper("test-html-selector", rootCmd) + cobra.CheckErr(err) + err = clay.InitLogger() + cobra.CheckErr(err) + helpSystem := help.NewHelpSystem() helpSystem.SetupCobraRootCommand(rootCmd) diff --git a/pkg/htmlsimplifier/node_handler.go b/pkg/htmlsimplifier/node_handler.go index 149ade9..b6ab81e 100644 --- a/pkg/htmlsimplifier/node_handler.go +++ b/pkg/htmlsimplifier/node_handler.go @@ -50,7 +50,7 @@ func NewNodeHandler(opts Options) *NodeHandler { // Configure default strategies h.tagStrategies["html"] = StrategyUnwrap h.tagStrategies["head"] = StrategyFilter - h.tagStrategies["body"] = StrategyUnwrap + h.tagStrategies["body"] = StrategyDefault if opts.StripScripts { h.tagStrategies["script"] = StrategyFilter diff --git a/pkg/htmlsimplifier/simplifier.go b/pkg/htmlsimplifier/simplifier.go index a794abc..a795511 100644 --- a/pkg/htmlsimplifier/simplifier.go +++ b/pkg/htmlsimplifier/simplifier.go @@ -155,7 +155,18 @@ func (s *Simplifier) ProcessHTML(htmlContent string) ([]Document, error) { doc.Find("[data-simplifier-keep]").RemoveAttr("data-simplifier-keep") } - return s.processNode(doc.Get(0)), nil + docs := s.processNode(doc.Get(0)) + if len(docs) == 0 { + return nil, fmt.Errorf("no documents found") + } + if len(docs) == 1 && docs[0].Tag == "body" { + if len(docs[0].Children) > 0 { + return docs[0].Children, nil + } + docs[0].Tag = "" + return []Document{docs[0]}, nil + } + return docs, nil } func (s *Simplifier) processNode(node *html.Node) []Document {