Files
root 832512469a backend: expand bookmark API with bulk ops and metadata fetcher
- bulk create/delete/move, reorder, rename-category endpoints
- /bookmarks/meta with SSRF-safe fetcher (blocks private/loopback IPs,
  8s timeout, 1 MiB body cap)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-02 22:52:43 +00:00

211 lines
4.8 KiB
Go

package service
import (
"context"
"errors"
"io"
"net"
"net/http"
"net/url"
"strings"
"syscall"
"time"
"golang.org/x/net/html"
)
type FetchedMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Icon string `json:"icon"`
}
const (
metaTimeout = 8 * time.Second
metaMaxBodyBytes = 1 << 20 // 1 MiB
)
var safeHTTPClient = &http.Client{
Timeout: metaTimeout,
Transport: &http.Transport{
DialContext: (&net.Dialer{
Timeout: 5 * time.Second,
KeepAlive: 30 * time.Second,
Control: restrictAddress,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 10,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: 5 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
},
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 5 {
return errors.New("too many redirects")
}
return nil
},
}
func FetchURLMeta(ctx context.Context, raw string) (*FetchedMeta, error) {
u, err := normalizeURL(raw)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; EvanPageBot/1.0)")
req.Header.Set("Accept", "text/html,application/xhtml+xml")
resp, err := safeHTTPClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return nil, errors.New("upstream returned " + resp.Status)
}
doc, err := html.Parse(io.LimitReader(resp.Body, metaMaxBodyBytes))
if err != nil {
return nil, err
}
meta := extractMeta(doc)
meta.Icon = resolveIcon(u, meta.Icon)
return meta, nil
}
func normalizeURL(raw string) (*url.URL, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, errors.New("url is required")
}
if !strings.Contains(raw, "://") {
raw = "https://" + raw
}
u, err := url.Parse(raw)
if err != nil {
return nil, errors.New("invalid url")
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil, errors.New("only http(s) urls are allowed")
}
if u.Host == "" {
return nil, errors.New("invalid url")
}
return u, nil
}
func restrictAddress(network, address string, _ syscall.RawConn) error {
if network != "tcp" && network != "tcp4" && network != "tcp6" {
return errors.New("disallowed network")
}
host, _, err := net.SplitHostPort(address)
if err != nil {
return err
}
ip := net.ParseIP(host)
if ip == nil {
return errors.New("address is not an ip")
}
if isBlockedIP(ip) {
return errors.New("blocked ip range")
}
return nil
}
func isBlockedIP(ip net.IP) bool {
if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() ||
ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() ||
ip.IsMulticast() || ip.IsUnspecified() {
return true
}
// Block 100.64.0.0/10 (CGNAT) and 169.254.0.0/16 (link-local) explicitly
cgnat := net.IPNet{IP: net.IPv4(100, 64, 0, 0), Mask: net.CIDRMask(10, 32)}
if v4 := ip.To4(); v4 != nil && cgnat.Contains(v4) {
return true
}
return false
}
func extractMeta(n *html.Node) *FetchedMeta {
m := &FetchedMeta{}
var walk func(*html.Node)
walk = func(node *html.Node) {
if node.Type == html.ElementNode {
switch strings.ToLower(node.Data) {
case "title":
if m.Title == "" && node.FirstChild != nil {
m.Title = strings.TrimSpace(textOf(node))
}
case "meta":
name := strings.ToLower(attr(node, "name"))
prop := strings.ToLower(attr(node, "property"))
content := attr(node, "content")
switch {
case name == "description" && m.Description == "":
m.Description = strings.TrimSpace(content)
case prop == "og:description" && m.Description == "":
m.Description = strings.TrimSpace(content)
case prop == "og:title" && m.Title == "":
m.Title = strings.TrimSpace(content)
}
case "link":
rel := strings.ToLower(attr(node, "rel"))
href := attr(node, "href")
if href == "" {
return
}
if strings.Contains(rel, "icon") && m.Icon == "" {
m.Icon = href
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(n)
return m
}
func attr(n *html.Node, key string) string {
for _, a := range n.Attr {
if strings.EqualFold(a.Key, key) {
return a.Val
}
}
return ""
}
func textOf(n *html.Node) string {
var b strings.Builder
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
} else {
b.WriteString(textOf(c))
}
}
return b.String()
}
func resolveIcon(base *url.URL, icon string) string {
if icon == "" {
return base.Scheme + "://" + base.Host + "/favicon.ico"
}
ref, err := url.Parse(icon)
if err != nil {
return ""
}
if ref.IsAbs() {
return ref.String()
}
return base.ResolveReference(ref).String()
}