apps/easel/aic.go 6.4 K raw
1
package main
2
3
import (
4
	"bytes"
5
	"context"
6
	"database/sql"
7
	"encoding/json"
8
	"fmt"
9
	"math/rand/v2"
10
	"net/http"
11
	"net/url"
12
	"time"
13
)
14
15
const aicSearchURL = "https://api.artic.edu/api/v1/artworks/search"
16
17
var aicFields = []string{
18
	"id", "title", "artist_display", "artist_title", "date_display",
19
	"medium_display", "dimensions", "place_of_origin", "credit_line",
20
	"description", "short_description", "image_id",
21
}
22
23
var aicExcludeFields = []string{
24
	"title", "description", "short_description", "term_titles", "subject_titles",
25
	"category_titles", "classification_titles",
26
}
27
28
type rawArtwork struct {
29
	ID               int64   `json:"id"`
30
	Title            *string `json:"title"`
31
	ArtistDisplay    *string `json:"artist_display"`
32
	ArtistTitle      *string `json:"artist_title"`
33
	DateDisplay      *string `json:"date_display"`
34
	MediumDisplay    *string `json:"medium_display"`
35
	Dimensions       *string `json:"dimensions"`
36
	PlaceOfOrigin    *string `json:"place_of_origin"`
37
	CreditLine       *string `json:"credit_line"`
38
	Description      *string `json:"description"`
39
	ShortDescription *string `json:"short_description"`
40
	ImageID          *string `json:"image_id"`
41
}
42
43
type searchResponse struct {
44
	Pagination struct {
45
		Total uint64 `json:"total"`
46
	} `json:"pagination"`
47
	Data []rawArtwork `json:"data"`
48
}
49
50
func buildHTTPClient() *http.Client {
51
	return &http.Client{Timeout: 20 * time.Second}
52
}
53
54
func buildAICParams(classifications, excludeTerms []string) string {
55
	terms := make([]string, 0, len(classifications))
56
	for _, c := range classifications {
57
		terms = append(terms, lower(c))
58
	}
59
	mustNot := make([]map[string]any, 0, len(excludeTerms))
60
	for _, t := range excludeTerms {
61
		mustNot = append(mustNot, map[string]any{
62
			"multi_match": map[string]any{
63
				"query":  t,
64
				"fields": aicExcludeFields,
65
				"type":   "phrase",
66
			},
67
		})
68
	}
69
	body := map[string]any{
70
		"query": map[string]any{
71
			"bool": map[string]any{
72
				"must": []any{
73
					map[string]any{"term": map[string]any{"is_public_domain": true}},
74
					map[string]any{"terms": map[string]any{"classification_title.keyword": terms}},
75
					map[string]any{"exists": map[string]any{"field": "image_id"}},
76
				},
77
				"must_not": mustNot,
78
			},
79
		},
80
	}
81
	buf, _ := json.Marshal(body)
82
	return string(buf)
83
}
84
85
func lower(s string) string {
86
	b := []byte(s)
87
	for i, c := range b {
88
		if c >= 'A' && c <= 'Z' {
89
			b[i] = c + 32
90
		}
91
	}
92
	return string(b)
93
}
94
95
func aicTotalMatching(ctx context.Context, client *http.Client, classifications, excludeTerms []string) (uint64, error) {
96
	params := buildAICParams(classifications, excludeTerms)
97
	u := fmt.Sprintf("%s?params=%s&limit=1&fields=id", aicSearchURL, url.QueryEscape(params))
98
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
99
	if err != nil {
100
		return 0, err
101
	}
102
	req.Header.Set("User-Agent", "andromeda-easel/0.1 (+https://github.com/stevedylandev/andromeda)")
103
	resp, err := client.Do(req)
104
	if err != nil {
105
		return 0, fmt.Errorf("count fetch failed: %w", err)
106
	}
107
	defer resp.Body.Close()
108
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
109
		return 0, fmt.Errorf("count status %s", resp.Status)
110
	}
111
	var sr searchResponse
112
	if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil {
113
		return 0, err
114
	}
115
	return sr.Pagination.Total, nil
116
}
117
118
func aicFetchArtworkAt(ctx context.Context, client *http.Client, classifications, excludeTerms []string, page uint64) (*rawArtwork, error) {
119
	params := buildAICParams(classifications, excludeTerms)
120
	u := fmt.Sprintf("%s?params=%s&limit=1&page=%d&fields=%s",
121
		aicSearchURL, url.QueryEscape(params), page, joinFields(aicFields))
122
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
123
	if err != nil {
124
		return nil, err
125
	}
126
	req.Header.Set("User-Agent", "andromeda-easel/0.1")
127
	resp, err := client.Do(req)
128
	if err != nil {
129
		return nil, fmt.Errorf("artwork fetch failed: %w", err)
130
	}
131
	defer resp.Body.Close()
132
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
133
		return nil, fmt.Errorf("artwork status %s", resp.Status)
134
	}
135
	body := &bytes.Buffer{}
136
	if _, err := body.ReadFrom(resp.Body); err != nil {
137
		return nil, err
138
	}
139
	var sr searchResponse
140
	if err := json.Unmarshal(body.Bytes(), &sr); err != nil {
141
		return nil, err
142
	}
143
	if len(sr.Data) == 0 {
144
		return nil, nil
145
	}
146
	return &sr.Data[len(sr.Data)-1], nil
147
}
148
149
func joinFields(fs []string) string {
150
	out := ""
151
	for i, f := range fs {
152
		if i > 0 {
153
			out += ","
154
		}
155
		out += f
156
	}
157
	return out
158
}
159
160
func pickUnique(ctx context.Context, client *http.Client, db *sql.DB, classifications, excludeTerms []string, maxRetries int) (*rawArtwork, error) {
161
	total, err := aicTotalMatching(ctx, client, classifications, excludeTerms)
162
	if err != nil {
163
		return nil, err
164
	}
165
	if total == 0 {
166
		return nil, fmt.Errorf("AIC search returned zero matches")
167
	}
168
	for attempt := 0; attempt <= maxRetries; attempt++ {
169
		page := rand.Uint64N(total) + 1
170
		art, err := aicFetchArtworkAt(ctx, client, classifications, excludeTerms, page)
171
		if err != nil {
172
			return nil, err
173
		}
174
		if art == nil || art.ImageID == nil || *art.ImageID == "" {
175
			continue
176
		}
177
		exists, err := artworkIDExists(db, art.ID)
178
		if err != nil {
179
			return nil, fmt.Errorf("dedup check: %w", err)
180
		}
181
		if exists {
182
			continue
183
		}
184
		return art, nil
185
	}
186
	return nil, fmt.Errorf("failed to pick non-duplicate artwork after %d retries", maxRetries+1)
187
}
188
189
func rawToDaily(r *rawArtwork, date, fetchedAt string) *DailyArtwork {
190
	if r.ImageID == nil || *r.ImageID == "" {
191
		return nil
192
	}
193
	title := "Untitled"
194
	if r.Title != nil && *r.Title != "" {
195
		title = *r.Title
196
	}
197
	d := &DailyArtwork{
198
		Date:      date,
199
		ArtworkID: r.ID,
200
		Title:     title,
201
		ImageID:   *r.ImageID,
202
		FetchedAt: fetchedAt,
203
	}
204
	if r.ArtistDisplay != nil {
205
		d.ArtistDisplay = sql.NullString{String: *r.ArtistDisplay, Valid: true}
206
	}
207
	if r.ArtistTitle != nil {
208
		d.ArtistTitle = sql.NullString{String: *r.ArtistTitle, Valid: true}
209
	}
210
	if r.DateDisplay != nil {
211
		d.DateDisplay = sql.NullString{String: *r.DateDisplay, Valid: true}
212
	}
213
	if r.MediumDisplay != nil {
214
		d.MediumDisplay = sql.NullString{String: *r.MediumDisplay, Valid: true}
215
	}
216
	if r.Dimensions != nil {
217
		d.Dimensions = sql.NullString{String: *r.Dimensions, Valid: true}
218
	}
219
	if r.PlaceOfOrigin != nil {
220
		d.PlaceOfOrigin = sql.NullString{String: *r.PlaceOfOrigin, Valid: true}
221
	}
222
	if r.CreditLine != nil {
223
		d.CreditLine = sql.NullString{String: *r.CreditLine, Valid: true}
224
	}
225
	if r.Description != nil {
226
		d.Description = sql.NullString{String: *r.Description, Valid: true}
227
	}
228
	if r.ShortDescription != nil {
229
		d.ShortDescription = sql.NullString{String: *r.ShortDescription, Valid: true}
230
	}
231
	return d
232
}