- bulk create/delete/move, reorder, rename-category endpoints - /bookmarks/meta with SSRF-safe fetcher (blocks private/loopback IPs, 8s timeout, 1 MiB body cap) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
211 lines
4.8 KiB
Go
211 lines
4.8 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
type FetchedMeta struct {
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
Icon string `json:"icon"`
|
|
}
|
|
|
|
const (
|
|
metaTimeout = 8 * time.Second
|
|
metaMaxBodyBytes = 1 << 20 // 1 MiB
|
|
)
|
|
|
|
var safeHTTPClient = &http.Client{
|
|
Timeout: metaTimeout,
|
|
Transport: &http.Transport{
|
|
DialContext: (&net.Dialer{
|
|
Timeout: 5 * time.Second,
|
|
KeepAlive: 30 * time.Second,
|
|
Control: restrictAddress,
|
|
}).DialContext,
|
|
ForceAttemptHTTP2: true,
|
|
MaxIdleConns: 10,
|
|
IdleConnTimeout: 30 * time.Second,
|
|
TLSHandshakeTimeout: 5 * time.Second,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
},
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= 5 {
|
|
return errors.New("too many redirects")
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
func FetchURLMeta(ctx context.Context, raw string) (*FetchedMeta, error) {
|
|
u, err := normalizeURL(raw)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; EvanPageBot/1.0)")
|
|
req.Header.Set("Accept", "text/html,application/xhtml+xml")
|
|
|
|
resp, err := safeHTTPClient.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 400 {
|
|
return nil, errors.New("upstream returned " + resp.Status)
|
|
}
|
|
|
|
doc, err := html.Parse(io.LimitReader(resp.Body, metaMaxBodyBytes))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
meta := extractMeta(doc)
|
|
meta.Icon = resolveIcon(u, meta.Icon)
|
|
return meta, nil
|
|
}
|
|
|
|
func normalizeURL(raw string) (*url.URL, error) {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return nil, errors.New("url is required")
|
|
}
|
|
if !strings.Contains(raw, "://") {
|
|
raw = "https://" + raw
|
|
}
|
|
u, err := url.Parse(raw)
|
|
if err != nil {
|
|
return nil, errors.New("invalid url")
|
|
}
|
|
if u.Scheme != "http" && u.Scheme != "https" {
|
|
return nil, errors.New("only http(s) urls are allowed")
|
|
}
|
|
if u.Host == "" {
|
|
return nil, errors.New("invalid url")
|
|
}
|
|
return u, nil
|
|
}
|
|
|
|
func restrictAddress(network, address string, _ syscall.RawConn) error {
|
|
if network != "tcp" && network != "tcp4" && network != "tcp6" {
|
|
return errors.New("disallowed network")
|
|
}
|
|
host, _, err := net.SplitHostPort(address)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ip := net.ParseIP(host)
|
|
if ip == nil {
|
|
return errors.New("address is not an ip")
|
|
}
|
|
if isBlockedIP(ip) {
|
|
return errors.New("blocked ip range")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isBlockedIP(ip net.IP) bool {
|
|
if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() ||
|
|
ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() ||
|
|
ip.IsMulticast() || ip.IsUnspecified() {
|
|
return true
|
|
}
|
|
// Block 100.64.0.0/10 (CGNAT) and 169.254.0.0/16 (link-local) explicitly
|
|
cgnat := net.IPNet{IP: net.IPv4(100, 64, 0, 0), Mask: net.CIDRMask(10, 32)}
|
|
if v4 := ip.To4(); v4 != nil && cgnat.Contains(v4) {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func extractMeta(n *html.Node) *FetchedMeta {
|
|
m := &FetchedMeta{}
|
|
var walk func(*html.Node)
|
|
walk = func(node *html.Node) {
|
|
if node.Type == html.ElementNode {
|
|
switch strings.ToLower(node.Data) {
|
|
case "title":
|
|
if m.Title == "" && node.FirstChild != nil {
|
|
m.Title = strings.TrimSpace(textOf(node))
|
|
}
|
|
case "meta":
|
|
name := strings.ToLower(attr(node, "name"))
|
|
prop := strings.ToLower(attr(node, "property"))
|
|
content := attr(node, "content")
|
|
switch {
|
|
case name == "description" && m.Description == "":
|
|
m.Description = strings.TrimSpace(content)
|
|
case prop == "og:description" && m.Description == "":
|
|
m.Description = strings.TrimSpace(content)
|
|
case prop == "og:title" && m.Title == "":
|
|
m.Title = strings.TrimSpace(content)
|
|
}
|
|
case "link":
|
|
rel := strings.ToLower(attr(node, "rel"))
|
|
href := attr(node, "href")
|
|
if href == "" {
|
|
return
|
|
}
|
|
if strings.Contains(rel, "icon") && m.Icon == "" {
|
|
m.Icon = href
|
|
}
|
|
}
|
|
}
|
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
walk(c)
|
|
}
|
|
}
|
|
walk(n)
|
|
return m
|
|
}
|
|
|
|
func attr(n *html.Node, key string) string {
|
|
for _, a := range n.Attr {
|
|
if strings.EqualFold(a.Key, key) {
|
|
return a.Val
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func textOf(n *html.Node) string {
|
|
var b strings.Builder
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
if c.Type == html.TextNode {
|
|
b.WriteString(c.Data)
|
|
} else {
|
|
b.WriteString(textOf(c))
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func resolveIcon(base *url.URL, icon string) string {
|
|
if icon == "" {
|
|
return base.Scheme + "://" + base.Host + "/favicon.ico"
|
|
}
|
|
ref, err := url.Parse(icon)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if ref.IsAbs() {
|
|
return ref.String()
|
|
}
|
|
return base.ResolveReference(ref).String()
|
|
}
|