Skip to content

Commit

Permalink
✨ Add raw and Markdown output
Browse files Browse the repository at this point in the history
  • Loading branch information
wesen committed Jan 26, 2025
1 parent 622cb27 commit 4c30b52
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 24 deletions.
17 changes: 17 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,23 @@
"td.title"
],
"cwd": "${workspaceFolder}"
},
{
"name": "HTML Selector - Config File",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/tools/test-html-selector",
"args": [
"select",
"--files",
"hn.html",
"--log-level",
"DEBUG",
"--config",
"/tmp/html-extraction-2025-01-26-17-42-34.yaml"
],
"cwd": "${workspaceFolder}"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -347,23 +347,24 @@ selectors:
description: |
Extracts the user's skills.
template: |
{{- range . }}
{{ $ := .Data }}
# User Profile
**Name**: {{ index $.user_name 0 }}
**Email**: {{ index $.user_email 0 }}
**Location**: {{ index $.user_location 0 }}
## Skills
{{- range $.user_skills }}
- {{ . }}
{{- end }}
{{ end }}
config:
sample_count: 5
context_chars: 100
template: |
{{- range . }}
{{ $ := .Data }}
# User Profile
**Name**: {{ index $.user_name 0 }}
**Email**: {{ index $.user_email 0 }}
**Location**: {{ index $.user_location 0 }}
## Skills
{{- range $.user_skills }}
- {{ . }}
{{- end }}
{{ end }}
```
## Best Practices
Expand Down
27 changes: 22 additions & 5 deletions cmd/tools/test-html-selector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/go-go-golems/go-go-mcp/pkg/htmlsimplifier"
"github.com/spf13/cobra"
"gopkg.in/yaml.v3"

md "github.com/JohannesKaufmann/html-to-markdown"
)

type Config struct {
Expand All @@ -40,9 +42,12 @@ type Selector struct {
}

type SimplifiedSample struct {
HTML []htmlsimplifier.Document `yaml:"html,omitempty"`
Context []htmlsimplifier.Document `yaml:"context,omitempty"`
Path string `yaml:"path,omitempty"`
SimplifiedHTML []htmlsimplifier.Document `yaml:"simplified_html,omitempty"`
HTML string `yaml:"html,omitempty"`
SimplifiedContext []htmlsimplifier.Document `yaml:"simplified_context,omitempty"`
Context string `yaml:"context,omitempty"`
Markdown string `yaml:"markdown,omitempty"`
Path string `yaml:"path,omitempty"`
}

type SimplifiedResult struct {
Expand Down Expand Up @@ -340,6 +345,9 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
return yaml.NewEncoder(w).Encode(sourceResults)
}

// Create markdown converter
converter := md.NewConverter("", true, nil)

// Convert results to use Document structure for normal output
newResults := make(map[string]*SimplifiedResult)
for _, sourceResult := range sourceResults {
Expand All @@ -360,8 +368,14 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
return fmt.Errorf("failed to process HTML: %w", err)
}

markdown, err := converter.ConvertString(selectorSample.HTML)
if err == nil {
}

sample := SimplifiedSample{
HTML: htmlDocs,
SimplifiedHTML: htmlDocs,
HTML: selectorSample.HTML,
Markdown: markdown,
}

if s.ShowPath {
Expand All @@ -372,7 +386,8 @@ func (c *HTMLSelectorCommand) RunIntoWriter(
if err != nil {
return fmt.Errorf("failed to process HTML: %w", err)
}
sample.Context = htmlDocs
sample.SimplifiedContext = htmlDocs
sample.Context = selectorSample.Context
}
newResults[selectorResult.Name].Samples = append(newResults[selectorResult.Name].Samples, sample)
}
Expand Down Expand Up @@ -449,12 +464,14 @@ func processSource(

result.Data = make(map[string][]interface{})
result.SelectorResults = results

for _, r := range results {
var matches []interface{}
for _, selectorSample := range r.Samples {
// Process HTML content
htmlDocs, err := simplifier.ProcessHTML(selectorSample.HTML)
if err == nil {

for _, doc := range htmlDocs {
if doc.Text != "" {
matches = append(matches, doc.Text)
Expand Down
6 changes: 1 addition & 5 deletions examples/html-extract/html-extraction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ flags:
type: bool
help: Show element path in results
default: false
- name: extract_template
type: string
help: Optional template file for formatting results

shell-script: |
CONFIG_FILE=$(mktemp)
Expand All @@ -39,5 +36,4 @@ shell-script: |
--urls {{ range .Args.urls }}{{ . }} {{ end }} \
--config $CONFIG_FILE \
{{ if .Args.show_context }}--show-context{{ end }} \
{{ if .Args.show_path }}--show-path{{ end }} \
{{ if .Args.extract_template }}--extract --extract-template {{ .Args.extract_template }}{{ end }}
{{ if .Args.show_path }}--show-path{{ end }} \

0 comments on commit 4c30b52

Please sign in to comment.