package service import ( "context" "errors" "io" "net" "net/http" "net/url" "strings" "syscall" "time" "golang.org/x/net/html" ) type FetchedMeta struct { Title string `json:"title"` Description string `json:"description"` Icon string `json:"icon"` } const ( metaTimeout = 8 * time.Second metaMaxBodyBytes = 1 << 20 // 1 MiB ) var safeHTTPClient = &http.Client{ Timeout: metaTimeout, Transport: &http.Transport{ DialContext: (&net.Dialer{ Timeout: 5 * time.Second, KeepAlive: 30 * time.Second, Control: restrictAddress, }).DialContext, ForceAttemptHTTP2: true, MaxIdleConns: 10, IdleConnTimeout: 30 * time.Second, TLSHandshakeTimeout: 5 * time.Second, ExpectContinueTimeout: 1 * time.Second, }, CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 5 { return errors.New("too many redirects") } return nil }, } func FetchURLMeta(ctx context.Context, raw string) (*FetchedMeta, error) { u, err := normalizeURL(raw) if err != nil { return nil, err } req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) if err != nil { return nil, err } req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; EvanPageBot/1.0)") req.Header.Set("Accept", "text/html,application/xhtml+xml") resp, err := safeHTTPClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode >= 400 { return nil, errors.New("upstream returned " + resp.Status) } doc, err := html.Parse(io.LimitReader(resp.Body, metaMaxBodyBytes)) if err != nil { return nil, err } meta := extractMeta(doc) meta.Icon = resolveIcon(u, meta.Icon) return meta, nil } func normalizeURL(raw string) (*url.URL, error) { raw = strings.TrimSpace(raw) if raw == "" { return nil, errors.New("url is required") } if !strings.Contains(raw, "://") { raw = "https://" + raw } u, err := url.Parse(raw) if err != nil { return nil, errors.New("invalid url") } if u.Scheme != "http" && u.Scheme != "https" { return nil, errors.New("only http(s) urls are allowed") } if u.Host == "" { return nil, errors.New("invalid url") } return u, nil } func restrictAddress(network, address string, _ syscall.RawConn) error { if network != "tcp" && network != "tcp4" && network != "tcp6" { return errors.New("disallowed network") } host, _, err := net.SplitHostPort(address) if err != nil { return err } ip := net.ParseIP(host) if ip == nil { return errors.New("address is not an ip") } if isBlockedIP(ip) { return errors.New("blocked ip range") } return nil } func isBlockedIP(ip net.IP) bool { if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() || ip.IsMulticast() || ip.IsUnspecified() { return true } // Block 100.64.0.0/10 (CGNAT) and 169.254.0.0/16 (link-local) explicitly cgnat := net.IPNet{IP: net.IPv4(100, 64, 0, 0), Mask: net.CIDRMask(10, 32)} if v4 := ip.To4(); v4 != nil && cgnat.Contains(v4) { return true } return false } func extractMeta(n *html.Node) *FetchedMeta { m := &FetchedMeta{} var walk func(*html.Node) walk = func(node *html.Node) { if node.Type == html.ElementNode { switch strings.ToLower(node.Data) { case "title": if m.Title == "" && node.FirstChild != nil { m.Title = strings.TrimSpace(textOf(node)) } case "meta": name := strings.ToLower(attr(node, "name")) prop := strings.ToLower(attr(node, "property")) content := attr(node, "content") switch { case name == "description" && m.Description == "": m.Description = strings.TrimSpace(content) case prop == "og:description" && m.Description == "": m.Description = strings.TrimSpace(content) case prop == "og:title" && m.Title == "": m.Title = strings.TrimSpace(content) } case "link": rel := strings.ToLower(attr(node, "rel")) href := attr(node, "href") if href == "" { return } if strings.Contains(rel, "icon") && m.Icon == "" { m.Icon = href } } } for c := node.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(n) return m } func attr(n *html.Node, key string) string { for _, a := range n.Attr { if strings.EqualFold(a.Key, key) { return a.Val } } return "" } func textOf(n *html.Node) string { var b strings.Builder for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { b.WriteString(c.Data) } else { b.WriteString(textOf(c)) } } return b.String() } func resolveIcon(base *url.URL, icon string) string { if icon == "" { return base.Scheme + "://" + base.Host + "/favicon.ico" } ref, err := url.Parse(icon) if err != nil { return "" } if ref.IsAbs() { return ref.String() } return base.ResolveReference(ref).String() }