Files
ai-mailer/internal/fetcher/fetcher.go
2025-03-01 05:11:04 +01:00

75 lines
1.4 KiB
Go

package fetcher
import (
"io"
"net/http"
"strings"
"time"
"golang.org/x/net/html"
)
type Fetcher struct {
client *http.Client
}
func New() *Fetcher {
return &Fetcher{
client: &http.Client{
Timeout: 30 * time.Second,
},
}
}
func (f *Fetcher) extractText(htmlContent string) string {
doc, err := html.Parse(strings.NewReader(htmlContent))
if err != nil {
return htmlContent // fallback to raw content if parsing fails
}
var result strings.Builder
var extractTextNode func(*html.Node)
extractTextNode = func(n *html.Node) {
if n.Type == html.TextNode {
text := strings.TrimSpace(n.Data)
if text != "" {
result.WriteString(text)
result.WriteString(" ")
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
extractTextNode(c)
}
}
extractTextNode(doc)
return strings.TrimSpace(result.String())
}
func (f *Fetcher) FetchContent(url string) (string, error) {
resp, err := f.client.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return f.extractText(string(body)), nil
}
func (f *Fetcher) FetchAllURLs(urls []string) (map[string]string, error) {
results := make(map[string]string)
for _, url := range urls {
content, err := f.FetchContent(url)
if err != nil {
return nil, err
}
results[url] = content
}
return results, nil
}