| 1 | package main |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "context" |
| 6 | "database/sql" |
| 7 | "encoding/json" |
| 8 | "fmt" |
| 9 | "math/rand/v2" |
| 10 | "net/http" |
| 11 | "net/url" |
| 12 | "time" |
| 13 | ) |
| 14 | |
| 15 | const aicSearchURL = "https://api.artic.edu/api/v1/artworks/search" |
| 16 | |
| 17 | var aicFields = []string{ |
| 18 | "id", "title", "artist_display", "artist_title", "date_display", |
| 19 | "medium_display", "dimensions", "place_of_origin", "credit_line", |
| 20 | "description", "short_description", "image_id", |
| 21 | } |
| 22 | |
| 23 | var aicExcludeFields = []string{ |
| 24 | "title", "description", "short_description", "term_titles", "subject_titles", |
| 25 | "category_titles", "classification_titles", |
| 26 | } |
| 27 | |
| 28 | type rawArtwork struct { |
| 29 | ID int64 `json:"id"` |
| 30 | Title *string `json:"title"` |
| 31 | ArtistDisplay *string `json:"artist_display"` |
| 32 | ArtistTitle *string `json:"artist_title"` |
| 33 | DateDisplay *string `json:"date_display"` |
| 34 | MediumDisplay *string `json:"medium_display"` |
| 35 | Dimensions *string `json:"dimensions"` |
| 36 | PlaceOfOrigin *string `json:"place_of_origin"` |
| 37 | CreditLine *string `json:"credit_line"` |
| 38 | Description *string `json:"description"` |
| 39 | ShortDescription *string `json:"short_description"` |
| 40 | ImageID *string `json:"image_id"` |
| 41 | } |
| 42 | |
| 43 | type searchResponse struct { |
| 44 | Pagination struct { |
| 45 | Total uint64 `json:"total"` |
| 46 | } `json:"pagination"` |
| 47 | Data []rawArtwork `json:"data"` |
| 48 | } |
| 49 | |
| 50 | func buildHTTPClient() *http.Client { |
| 51 | return &http.Client{Timeout: 20 * time.Second} |
| 52 | } |
| 53 | |
| 54 | func buildAICParams(classifications, excludeTerms []string) string { |
| 55 | terms := make([]string, 0, len(classifications)) |
| 56 | for _, c := range classifications { |
| 57 | terms = append(terms, lower(c)) |
| 58 | } |
| 59 | mustNot := make([]map[string]any, 0, len(excludeTerms)) |
| 60 | for _, t := range excludeTerms { |
| 61 | mustNot = append(mustNot, map[string]any{ |
| 62 | "multi_match": map[string]any{ |
| 63 | "query": t, |
| 64 | "fields": aicExcludeFields, |
| 65 | "type": "phrase", |
| 66 | }, |
| 67 | }) |
| 68 | } |
| 69 | body := map[string]any{ |
| 70 | "query": map[string]any{ |
| 71 | "bool": map[string]any{ |
| 72 | "must": []any{ |
| 73 | map[string]any{"term": map[string]any{"is_public_domain": true}}, |
| 74 | map[string]any{"terms": map[string]any{"classification_title.keyword": terms}}, |
| 75 | map[string]any{"exists": map[string]any{"field": "image_id"}}, |
| 76 | }, |
| 77 | "must_not": mustNot, |
| 78 | }, |
| 79 | }, |
| 80 | } |
| 81 | buf, _ := json.Marshal(body) |
| 82 | return string(buf) |
| 83 | } |
| 84 | |
| 85 | func lower(s string) string { |
| 86 | b := []byte(s) |
| 87 | for i, c := range b { |
| 88 | if c >= 'A' && c <= 'Z' { |
| 89 | b[i] = c + 32 |
| 90 | } |
| 91 | } |
| 92 | return string(b) |
| 93 | } |
| 94 | |
| 95 | func aicTotalMatching(ctx context.Context, client *http.Client, classifications, excludeTerms []string) (uint64, error) { |
| 96 | params := buildAICParams(classifications, excludeTerms) |
| 97 | u := fmt.Sprintf("%s?params=%s&limit=1&fields=id", aicSearchURL, url.QueryEscape(params)) |
| 98 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) |
| 99 | if err != nil { |
| 100 | return 0, err |
| 101 | } |
| 102 | req.Header.Set("User-Agent", "andromeda-easel/0.1 (+https://github.com/stevedylandev/andromeda)") |
| 103 | resp, err := client.Do(req) |
| 104 | if err != nil { |
| 105 | return 0, fmt.Errorf("count fetch failed: %w", err) |
| 106 | } |
| 107 | defer resp.Body.Close() |
| 108 | if resp.StatusCode < 200 || resp.StatusCode >= 300 { |
| 109 | return 0, fmt.Errorf("count status %s", resp.Status) |
| 110 | } |
| 111 | var sr searchResponse |
| 112 | if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil { |
| 113 | return 0, err |
| 114 | } |
| 115 | return sr.Pagination.Total, nil |
| 116 | } |
| 117 | |
| 118 | func aicFetchArtworkAt(ctx context.Context, client *http.Client, classifications, excludeTerms []string, page uint64) (*rawArtwork, error) { |
| 119 | params := buildAICParams(classifications, excludeTerms) |
| 120 | u := fmt.Sprintf("%s?params=%s&limit=1&page=%d&fields=%s", |
| 121 | aicSearchURL, url.QueryEscape(params), page, joinFields(aicFields)) |
| 122 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) |
| 123 | if err != nil { |
| 124 | return nil, err |
| 125 | } |
| 126 | req.Header.Set("User-Agent", "andromeda-easel/0.1") |
| 127 | resp, err := client.Do(req) |
| 128 | if err != nil { |
| 129 | return nil, fmt.Errorf("artwork fetch failed: %w", err) |
| 130 | } |
| 131 | defer resp.Body.Close() |
| 132 | if resp.StatusCode < 200 || resp.StatusCode >= 300 { |
| 133 | return nil, fmt.Errorf("artwork status %s", resp.Status) |
| 134 | } |
| 135 | body := &bytes.Buffer{} |
| 136 | if _, err := body.ReadFrom(resp.Body); err != nil { |
| 137 | return nil, err |
| 138 | } |
| 139 | var sr searchResponse |
| 140 | if err := json.Unmarshal(body.Bytes(), &sr); err != nil { |
| 141 | return nil, err |
| 142 | } |
| 143 | if len(sr.Data) == 0 { |
| 144 | return nil, nil |
| 145 | } |
| 146 | return &sr.Data[len(sr.Data)-1], nil |
| 147 | } |
| 148 | |
| 149 | func joinFields(fs []string) string { |
| 150 | out := "" |
| 151 | for i, f := range fs { |
| 152 | if i > 0 { |
| 153 | out += "," |
| 154 | } |
| 155 | out += f |
| 156 | } |
| 157 | return out |
| 158 | } |
| 159 | |
| 160 | func pickUnique(ctx context.Context, client *http.Client, db *sql.DB, classifications, excludeTerms []string, maxRetries int) (*rawArtwork, error) { |
| 161 | total, err := aicTotalMatching(ctx, client, classifications, excludeTerms) |
| 162 | if err != nil { |
| 163 | return nil, err |
| 164 | } |
| 165 | if total == 0 { |
| 166 | return nil, fmt.Errorf("AIC search returned zero matches") |
| 167 | } |
| 168 | for attempt := 0; attempt <= maxRetries; attempt++ { |
| 169 | page := rand.Uint64N(total) + 1 |
| 170 | art, err := aicFetchArtworkAt(ctx, client, classifications, excludeTerms, page) |
| 171 | if err != nil { |
| 172 | return nil, err |
| 173 | } |
| 174 | if art == nil || art.ImageID == nil || *art.ImageID == "" { |
| 175 | continue |
| 176 | } |
| 177 | exists, err := artworkIDExists(db, art.ID) |
| 178 | if err != nil { |
| 179 | return nil, fmt.Errorf("dedup check: %w", err) |
| 180 | } |
| 181 | if exists { |
| 182 | continue |
| 183 | } |
| 184 | return art, nil |
| 185 | } |
| 186 | return nil, fmt.Errorf("failed to pick non-duplicate artwork after %d retries", maxRetries+1) |
| 187 | } |
| 188 | |
| 189 | func rawToDaily(r *rawArtwork, date, fetchedAt string) *DailyArtwork { |
| 190 | if r.ImageID == nil || *r.ImageID == "" { |
| 191 | return nil |
| 192 | } |
| 193 | title := "Untitled" |
| 194 | if r.Title != nil && *r.Title != "" { |
| 195 | title = *r.Title |
| 196 | } |
| 197 | d := &DailyArtwork{ |
| 198 | Date: date, |
| 199 | ArtworkID: r.ID, |
| 200 | Title: title, |
| 201 | ImageID: *r.ImageID, |
| 202 | FetchedAt: fetchedAt, |
| 203 | } |
| 204 | if r.ArtistDisplay != nil { |
| 205 | d.ArtistDisplay = sql.NullString{String: *r.ArtistDisplay, Valid: true} |
| 206 | } |
| 207 | if r.ArtistTitle != nil { |
| 208 | d.ArtistTitle = sql.NullString{String: *r.ArtistTitle, Valid: true} |
| 209 | } |
| 210 | if r.DateDisplay != nil { |
| 211 | d.DateDisplay = sql.NullString{String: *r.DateDisplay, Valid: true} |
| 212 | } |
| 213 | if r.MediumDisplay != nil { |
| 214 | d.MediumDisplay = sql.NullString{String: *r.MediumDisplay, Valid: true} |
| 215 | } |
| 216 | if r.Dimensions != nil { |
| 217 | d.Dimensions = sql.NullString{String: *r.Dimensions, Valid: true} |
| 218 | } |
| 219 | if r.PlaceOfOrigin != nil { |
| 220 | d.PlaceOfOrigin = sql.NullString{String: *r.PlaceOfOrigin, Valid: true} |
| 221 | } |
| 222 | if r.CreditLine != nil { |
| 223 | d.CreditLine = sql.NullString{String: *r.CreditLine, Valid: true} |
| 224 | } |
| 225 | if r.Description != nil { |
| 226 | d.Description = sql.NullString{String: *r.Description, Valid: true} |
| 227 | } |
| 228 | if r.ShortDescription != nil { |
| 229 | d.ShortDescription = sql.NullString{String: *r.ShortDescription, Valid: true} |
| 230 | } |
| 231 | return d |
| 232 | } |