apps/og/og.go 3.9 K raw
1
package main
2
3
import (
4
	"context"
5
	"errors"
6
	"fmt"
7
	"io"
8
	"net/http"
9
	"net/url"
10
	"strings"
11
	"time"
12
13
	"golang.org/x/net/html"
14
)
15
16
const userAgent = "Mozilla/5.0 (compatible; OGPreview/1.0)"
17
18
type ogResult struct {
19
	OGTags   map[string]string
20
	OGOrder  []string
21
	Favicon  string
22
	LinkTags []linkTag
23
}
24
25
func fetchOGData(ctx context.Context, target string) (*ogResult, error) {
26
	parsed, err := url.Parse(target)
27
	if err != nil {
28
		return nil, fmt.Errorf("Invalid URL: %w", err)
29
	}
30
	client := &http.Client{Timeout: 10 * time.Second}
31
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, target, nil)
32
	if err != nil {
33
		return nil, fmt.Errorf("Failed to create HTTP client: %w", err)
34
	}
35
	req.Header.Set("User-Agent", userAgent)
36
	resp, err := client.Do(req)
37
	if err != nil {
38
		return nil, fmt.Errorf("Failed to fetch URL: %w", err)
39
	}
40
	defer resp.Body.Close()
41
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
42
		return nil, fmt.Errorf("HTTP error: %s", resp.Status)
43
	}
44
	ct := resp.Header.Get("Content-Type")
45
	if !strings.Contains(ct, "text/html") && !strings.Contains(ct, "application/xhtml") {
46
		return nil, fmt.Errorf("Not an HTML page (Content-Type: %s)", ct)
47
	}
48
	body, err := io.ReadAll(io.LimitReader(resp.Body, 4<<20))
49
	if err != nil {
50
		return nil, fmt.Errorf("Failed to read response body: %w", err)
51
	}
52
	doc, err := html.Parse(strings.NewReader(string(body)))
53
	if err != nil {
54
		return nil, errors.New("Failed to parse HTML")
55
	}
56
57
	res := &ogResult{OGTags: map[string]string{}}
58
	walk(doc, func(n *html.Node) {
59
		if n.Type != html.ElementNode {
60
			return
61
		}
62
		if strings.EqualFold(n.Data, "meta") {
63
			attrs := attrsOf(n)
64
			key := attrs["property"]
65
			if key == "" {
66
				key = attrs["name"]
67
			}
68
			if strings.HasPrefix(key, "og:") {
69
				if _, exists := res.OGTags[key]; !exists {
70
					res.OGTags[key] = attrs["content"]
71
					res.OGOrder = append(res.OGOrder, key)
72
				}
73
			}
74
		}
75
	})
76
77
	if image, ok := res.OGTags["og:image"]; ok {
78
		if u, err := parsed.Parse(image); err == nil {
79
			res.OGTags["og:image"] = u.String()
80
		}
81
	}
82
83
	res.Favicon = extractFavicon(doc, parsed)
84
	res.LinkTags = extractLinkTags(doc, parsed)
85
	return res, nil
86
}
87
88
func extractFavicon(doc *html.Node, base *url.URL) string {
89
	rels := []string{"icon", "shortcut icon", "apple-touch-icon"}
90
	for _, want := range rels {
91
		var found string
92
		walk(doc, func(n *html.Node) {
93
			if found != "" || n.Type != html.ElementNode || !strings.EqualFold(n.Data, "link") {
94
				return
95
			}
96
			attrs := attrsOf(n)
97
			if strings.EqualFold(strings.TrimSpace(attrs["rel"]), want) {
98
				if href := attrs["href"]; href != "" {
99
					if u, err := base.Parse(href); err == nil {
100
						found = u.String()
101
					}
102
				}
103
			}
104
		})
105
		if found != "" {
106
			return found
107
		}
108
	}
109
	if fb, err := base.Parse("/favicon.ico"); err == nil {
110
		return fb.String()
111
	}
112
	return ""
113
}
114
115
func extractLinkTags(doc *html.Node, base *url.URL) []linkTag {
116
	var head *html.Node
117
	walk(doc, func(n *html.Node) {
118
		if head != nil {
119
			return
120
		}
121
		if n.Type == html.ElementNode && strings.EqualFold(n.Data, "head") {
122
			head = n
123
		}
124
	})
125
	if head == nil {
126
		return nil
127
	}
128
	var out []linkTag
129
	walk(head, func(n *html.Node) {
130
		if n.Type != html.ElementNode || !strings.EqualFold(n.Data, "link") {
131
			return
132
		}
133
		attrs := attrsOf(n)
134
		href := attrs["href"]
135
		if href != "" {
136
			if u, err := base.Parse(href); err == nil {
137
				href = u.String()
138
			}
139
		}
140
		extras := []string{}
141
		for _, a := range n.Attr {
142
			if a.Key == "rel" || a.Key == "href" {
143
				continue
144
			}
145
			extras = append(extras, fmt.Sprintf(`%s="%s"`, a.Key, a.Val))
146
		}
147
		out = append(out, linkTag{Rel: attrs["rel"], Href: href, Extra: strings.Join(extras, " ")})
148
	})
149
	return out
150
}
151
152
func attrsOf(n *html.Node) map[string]string {
153
	out := make(map[string]string, len(n.Attr))
154
	for _, a := range n.Attr {
155
		out[strings.ToLower(a.Key)] = a.Val
156
	}
157
	return out
158
}
159
160
func walk(n *html.Node, visit func(*html.Node)) {
161
	visit(n)
162
	for c := n.FirstChild; c != nil; c = c.NextSibling {
163
		walk(c, visit)
164
	}
165
}