chore: impoved data indexing and added more data to the client
654390cd
14 file(s) · +586 −110
| 11 | 11 | "dev:client": "cd packages/client && npm run dev", |
|
| 12 | 12 | "deploy": "cd packages/server && npm run deploy", |
|
| 13 | 13 | "deploy:client": "cd packages/client && npm run pages:deploy", |
|
| 14 | - | "db:create": "cd packages/server && npm run db:create", |
|
| 14 | + | "db:init": "cd packages/server && npm run db:init", |
|
| 15 | + | "db:init:prod": "cd packages/server && npm run db:init:prod", |
|
| 15 | 16 | "db:migrate": "cd packages/server && npm run db:migrate", |
|
| 16 | 17 | "db:migrate:prod": "cd packages/server && npm run db:migrate:prod", |
|
| 17 | 18 | "secret:set": "cd packages/server && npm run secret:set" |
| 6 | 6 | "dev": "vite", |
|
| 7 | 7 | "build": "tsc && vite build", |
|
| 8 | 8 | "preview": "vite preview", |
|
| 9 | - | "pages:deploy": "vite build && wrangler pages deploy dist --project-name=atfeeds" |
|
| 9 | + | "deploy": "vite build && wrangler pages deploy dist" |
|
| 10 | 10 | }, |
|
| 11 | 11 | "dependencies": { |
|
| 12 | 12 | "react": "^18.2.0", |
| 3 | 3 | // API base URL - empty for same-origin (local dev), or set via env var for production |
|
| 4 | 4 | const API_URL = "https://atfeeds-api.stevedsimkins.workers.dev"; |
|
| 5 | 5 | ||
| 6 | + | interface BskyPostRef { |
|
| 7 | + | uri: string; |
|
| 8 | + | cid: string; |
|
| 9 | + | } |
|
| 10 | + | ||
| 11 | + | interface Publication { |
|
| 12 | + | url: string; |
|
| 13 | + | name: string; |
|
| 14 | + | description?: string; |
|
| 15 | + | iconCid?: string; |
|
| 16 | + | iconUrl?: string; |
|
| 17 | + | } |
|
| 18 | + | ||
| 6 | 19 | interface Document { |
|
| 7 | 20 | uri: string; |
|
| 8 | 21 | did: string; |
|
| 9 | 22 | rkey: string; |
|
| 10 | 23 | title: string; |
|
| 11 | - | path: string | null; |
|
| 12 | - | site: string | null; |
|
| 13 | - | content: { |
|
| 24 | + | description?: string; |
|
| 25 | + | path?: string; |
|
| 26 | + | site?: string; |
|
| 27 | + | content?: { |
|
| 14 | 28 | $type: string; |
|
| 15 | 29 | markdown?: string; |
|
| 16 | - | } | null; |
|
| 17 | - | textContent: string | null; |
|
| 18 | - | publishedAt: string | null; |
|
| 19 | - | viewUrl: string | null; |
|
| 30 | + | }; |
|
| 31 | + | textContent?: string; |
|
| 32 | + | coverImageCid?: string; |
|
| 33 | + | coverImageUrl?: string; |
|
| 34 | + | bskyPostRef?: BskyPostRef; |
|
| 35 | + | tags?: string[]; |
|
| 36 | + | publishedAt?: string; |
|
| 37 | + | updatedAt?: string; |
|
| 38 | + | publication?: Publication; |
|
| 39 | + | viewUrl?: string; |
|
| 40 | + | pdsEndpoint?: string; |
|
| 20 | 41 | } |
|
| 21 | 42 | ||
| 22 | 43 | interface FeedResponse { |
|
| 52 | 73 | fetchFeed(); |
|
| 53 | 74 | }, []); |
|
| 54 | 75 | ||
| 55 | - | const formatDate = (dateString: string | null) => { |
|
| 76 | + | const formatDate = (dateString?: string) => { |
|
| 56 | 77 | if (!dateString) return "Unknown date"; |
|
| 57 | 78 | return new Date(dateString).toLocaleDateString("en-US", { |
|
| 58 | 79 | year: "numeric", |
|
| 61 | 82 | }); |
|
| 62 | 83 | }; |
|
| 63 | 84 | ||
| 64 | - | const truncateText = (text: string | null, maxLength: number = 200) => { |
|
| 85 | + | const truncateText = (text?: string, maxLength: number = 200) => { |
|
| 65 | 86 | if (!text) return ""; |
|
| 66 | 87 | if (text.length <= maxLength) return text; |
|
| 67 | 88 | return text.slice(0, maxLength) + "..."; |
|
| 89 | + | }; |
|
| 90 | + | ||
| 91 | + | const getDescription = (doc: Document) => { |
|
| 92 | + | return doc.description || doc.textContent || ""; |
|
| 68 | 93 | }; |
|
| 69 | 94 | ||
| 70 | 95 | ||
| 130 | 155 | )} |
|
| 131 | 156 | </legend> |
|
| 132 | 157 | <div style={{ padding: "8px" }}> |
|
| 158 | + | {/* Publication info */} |
|
| 159 | + | {doc.publication && ( |
|
| 160 | + | <div |
|
| 161 | + | style={{ |
|
| 162 | + | display: "flex", |
|
| 163 | + | alignItems: "center", |
|
| 164 | + | gap: "8px", |
|
| 165 | + | marginBottom: "8px", |
|
| 166 | + | fontSize: "0.85em", |
|
| 167 | + | }} |
|
| 168 | + | > |
|
| 169 | + | {doc.publication.iconUrl && ( |
|
| 170 | + | <img |
|
| 171 | + | src={doc.publication.iconUrl} |
|
| 172 | + | alt={doc.publication.name} |
|
| 173 | + | style={{ |
|
| 174 | + | width: "16px", |
|
| 175 | + | height: "16px", |
|
| 176 | + | objectFit: "cover", |
|
| 177 | + | }} |
|
| 178 | + | /> |
|
| 179 | + | )} |
|
| 180 | + | <span style={{ fontWeight: "bold" }}> |
|
| 181 | + | {doc.publication.name} |
|
| 182 | + | </span> |
|
| 183 | + | </div> |
|
| 184 | + | )} |
|
| 185 | + | ||
| 186 | + | {/* Cover image */} |
|
| 187 | + | {doc.coverImageUrl && ( |
|
| 188 | + | <div style={{ marginBottom: "8px" }}> |
|
| 189 | + | <img |
|
| 190 | + | src={doc.coverImageUrl} |
|
| 191 | + | alt={doc.title} |
|
| 192 | + | style={{ |
|
| 193 | + | maxWidth: "100%", |
|
| 194 | + | maxHeight: "200px", |
|
| 195 | + | objectFit: "cover", |
|
| 196 | + | border: "1px solid #888", |
|
| 197 | + | }} |
|
| 198 | + | /> |
|
| 199 | + | </div> |
|
| 200 | + | )} |
|
| 201 | + | ||
| 202 | + | {/* Date */} |
|
| 133 | 203 | <div |
|
| 134 | 204 | style={{ |
|
| 135 | 205 | marginBottom: "8px", |
|
| 138 | 208 | }} |
|
| 139 | 209 | > |
|
| 140 | 210 | Published: {formatDate(doc.publishedAt)} |
|
| 211 | + | {doc.updatedAt && doc.updatedAt !== doc.publishedAt && ( |
|
| 212 | + | <> | Updated: {formatDate(doc.updatedAt)}</> |
|
| 213 | + | )} |
|
| 141 | 214 | </div> |
|
| 142 | - | {doc.textContent && ( |
|
| 215 | + | ||
| 216 | + | {/* Description */} |
|
| 217 | + | {getDescription(doc) && ( |
|
| 143 | 218 | <p style={{ marginBottom: "12px" }}> |
|
| 144 | - | {truncateText(doc.textContent)} |
|
| 219 | + | {truncateText(getDescription(doc))} |
|
| 145 | 220 | </p> |
|
| 146 | 221 | )} |
|
| 147 | - | {doc.viewUrl && ( |
|
| 148 | - | <div style={{ textAlign: "right" }}> |
|
| 222 | + | ||
| 223 | + | {/* Tags */} |
|
| 224 | + | {doc.tags && doc.tags.length > 0 && ( |
|
| 225 | + | <div |
|
| 226 | + | style={{ |
|
| 227 | + | display: "flex", |
|
| 228 | + | flexWrap: "wrap", |
|
| 229 | + | gap: "4px", |
|
| 230 | + | marginBottom: "12px", |
|
| 231 | + | }} |
|
| 232 | + | > |
|
| 233 | + | {doc.tags.map((tag) => ( |
|
| 234 | + | <span |
|
| 235 | + | key={tag} |
|
| 236 | + | style={{ |
|
| 237 | + | background: "#c0c0c0", |
|
| 238 | + | padding: "2px 6px", |
|
| 239 | + | fontSize: "0.75em", |
|
| 240 | + | border: "1px solid #808080", |
|
| 241 | + | }} |
|
| 242 | + | > |
|
| 243 | + | {tag} |
|
| 244 | + | </span> |
|
| 245 | + | ))} |
|
| 246 | + | </div> |
|
| 247 | + | )} |
|
| 248 | + | ||
| 249 | + | {/* Actions */} |
|
| 250 | + | <div style={{ display: "flex", gap: "8px", justifyContent: "flex-end" }}> |
|
| 251 | + | {doc.bskyPostRef && ( |
|
| 252 | + | <button |
|
| 253 | + | onClick={() => |
|
| 254 | + | window.open( |
|
| 255 | + | `https://bsky.app/profile/${doc.did}/post/${doc.bskyPostRef!.uri.split("/").pop()}`, |
|
| 256 | + | "_blank" |
|
| 257 | + | ) |
|
| 258 | + | } |
|
| 259 | + | > |
|
| 260 | + | View on Bluesky |
|
| 261 | + | </button> |
|
| 262 | + | )} |
|
| 263 | + | {doc.viewUrl && ( |
|
| 149 | 264 | <button |
|
| 150 | 265 | onClick={() => |
|
| 151 | 266 | window.open(doc.viewUrl || "", "_blank") |
|
| 153 | 268 | > |
|
| 154 | 269 | Read More |
|
| 155 | 270 | </button> |
|
| 156 | - | </div> |
|
| 157 | - | )} |
|
| 271 | + | )} |
|
| 272 | + | </div> |
|
| 158 | 273 | </div> |
|
| 159 | 274 | </fieldset> |
|
| 160 | 275 | ))} |
|
| 1 | + | -- Migration: Add full Document and Publication fields to resolved_documents |
|
| 2 | + | -- Run with: wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote |
|
| 3 | + | ||
| 4 | + | -- Document fields |
|
| 5 | + | ALTER TABLE resolved_documents ADD COLUMN description TEXT; |
|
| 6 | + | ALTER TABLE resolved_documents ADD COLUMN cover_image_cid TEXT; |
|
| 7 | + | ALTER TABLE resolved_documents ADD COLUMN cover_image_url TEXT; |
|
| 8 | + | ALTER TABLE resolved_documents ADD COLUMN bsky_post_ref TEXT; |
|
| 9 | + | ALTER TABLE resolved_documents ADD COLUMN tags TEXT; |
|
| 10 | + | ALTER TABLE resolved_documents ADD COLUMN updated_at TEXT; |
|
| 11 | + | ||
| 12 | + | -- Publication fields |
|
| 13 | + | ALTER TABLE resolved_documents ADD COLUMN pub_url TEXT; |
|
| 14 | + | ALTER TABLE resolved_documents ADD COLUMN pub_name TEXT; |
|
| 15 | + | ALTER TABLE resolved_documents ADD COLUMN pub_description TEXT; |
|
| 16 | + | ALTER TABLE resolved_documents ADD COLUMN pub_icon_cid TEXT; |
|
| 17 | + | ALTER TABLE resolved_documents ADD COLUMN pub_icon_url TEXT; |
|
| 18 | + | ||
| 19 | + | -- Metadata |
|
| 20 | + | ALTER TABLE resolved_documents ADD COLUMN pds_endpoint TEXT; |
|
| 21 | + | ||
| 22 | + | -- Index for publication queries |
|
| 23 | + | CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url); |
| 4 | 4 | "private": true, |
|
| 5 | 5 | "scripts": { |
|
| 6 | 6 | "dev": "wrangler dev", |
|
| 7 | - | "deploy": "wrangler deploy" |
|
| 7 | + | "deploy": "wrangler deploy", |
|
| 8 | + | "db:init": "wrangler d1 execute atfeeds-db --file=schema.sql --local", |
|
| 9 | + | "db:init:prod": "wrangler d1 execute atfeeds-db --file=schema.sql --remote", |
|
| 10 | + | "db:migrate": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --local", |
|
| 11 | + | "db:migrate:prod": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote" |
|
| 8 | 12 | }, |
|
| 9 | 13 | "dependencies": { |
|
| 10 | 14 | "hono": "^4.0.0" |
| 32 | 32 | uri TEXT PRIMARY KEY, |
|
| 33 | 33 | did TEXT NOT NULL, |
|
| 34 | 34 | rkey TEXT NOT NULL, |
|
| 35 | + | -- Document fields |
|
| 35 | 36 | title TEXT, |
|
| 37 | + | description TEXT, |
|
| 36 | 38 | path TEXT, |
|
| 37 | 39 | site TEXT, |
|
| 38 | - | content TEXT, -- JSON blob |
|
| 40 | + | content TEXT, -- JSON blob for content union |
|
| 39 | 41 | text_content TEXT, |
|
| 42 | + | cover_image_cid TEXT, -- CID for cover image blob |
|
| 43 | + | cover_image_url TEXT, -- Full URL: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid} |
|
| 44 | + | bsky_post_ref TEXT, -- JSON blob for strong reference {uri, cid} |
|
| 45 | + | tags TEXT, -- JSON array of strings |
|
| 40 | 46 | published_at TEXT, |
|
| 41 | - | view_url TEXT, |
|
| 47 | + | updated_at TEXT, |
|
| 48 | + | -- Publication fields (resolved from site at:// URI) |
|
| 49 | + | pub_url TEXT, -- Publication base URL |
|
| 50 | + | pub_name TEXT, |
|
| 51 | + | pub_description TEXT, |
|
| 52 | + | pub_icon_cid TEXT, -- CID for publication icon blob |
|
| 53 | + | pub_icon_url TEXT, -- Full URL to publication icon |
|
| 54 | + | -- Metadata |
|
| 55 | + | view_url TEXT, -- Constructed canonical URL (pub_url + path) |
|
| 56 | + | pds_endpoint TEXT, -- Cached PDS endpoint for this DID |
|
| 42 | 57 | resolved_at TEXT DEFAULT (datetime('now')), |
|
| 43 | 58 | stale_at TEXT -- When this record should be re-resolved |
|
| 44 | 59 | ); |
|
| 45 | 60 | ||
| 46 | 61 | CREATE INDEX IF NOT EXISTS idx_resolved_documents_rkey ON resolved_documents(rkey DESC); |
|
| 47 | 62 | CREATE INDEX IF NOT EXISTS idx_resolved_documents_stale ON resolved_documents(stale_at); |
|
| 63 | + | CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url); |
| 1 | 1 | import { Hono } from "hono"; |
|
| 2 | 2 | import { cors } from "hono/cors"; |
|
| 3 | 3 | import type { Bindings } from "./types"; |
|
| 4 | - | import { health, webhook, feed, stats, records } from "./routes"; |
|
| 4 | + | import { health, webhook, feed, stats, records, admin } from "./routes"; |
|
| 5 | 5 | import { processDocument } from "./utils"; |
|
| 6 | 6 | ||
| 7 | 7 | const app = new Hono<{ Bindings: Bindings }>(); |
|
| 15 | 15 | app.route("/feed", feed); |
|
| 16 | 16 | app.route("/stats", stats); |
|
| 17 | 17 | app.route("/records", records); |
|
| 18 | + | app.route("/admin", admin); |
|
| 18 | 19 | ||
| 19 | 20 | // Legacy alias: /feed-raw -> /feed/raw |
|
| 20 | 21 | app.get("/feed-raw", async (c) => { |
|
| 1 | + | import { Hono } from "hono"; |
|
| 2 | + | import type { Bindings } from "../types"; |
|
| 3 | + | ||
| 4 | + | const admin = new Hono<{ Bindings: Bindings }>(); |
|
| 5 | + | ||
| 6 | + | // Queue all documents for re-processing |
|
| 7 | + | admin.post("/resolve-all", async (c) => { |
|
| 8 | + | try { |
|
| 9 | + | const db = c.env.DB; |
|
| 10 | + | const queue = c.env.RESOLUTION_QUEUE; |
|
| 11 | + | ||
| 12 | + | // Get all records from repo_records |
|
| 13 | + | const { results } = await db |
|
| 14 | + | .prepare( |
|
| 15 | + | `SELECT did, rkey FROM repo_records |
|
| 16 | + | WHERE collection = 'site.standard.document'` |
|
| 17 | + | ) |
|
| 18 | + | .all<{ did: string; rkey: string }>(); |
|
| 19 | + | ||
| 20 | + | if (!results || results.length === 0) { |
|
| 21 | + | return c.json({ message: "No documents to process", queued: 0 }); |
|
| 22 | + | } |
|
| 23 | + | ||
| 24 | + | // Queue in batches of 100 (Cloudflare Queue limit) |
|
| 25 | + | const batchSize = 100; |
|
| 26 | + | let queued = 0; |
|
| 27 | + | ||
| 28 | + | for (let i = 0; i < results.length; i += batchSize) { |
|
| 29 | + | const batch = results.slice(i, i + batchSize); |
|
| 30 | + | const messages = batch.map((row) => ({ |
|
| 31 | + | body: { |
|
| 32 | + | did: row.did, |
|
| 33 | + | collection: "site.standard.document", |
|
| 34 | + | rkey: row.rkey, |
|
| 35 | + | }, |
|
| 36 | + | })); |
|
| 37 | + | ||
| 38 | + | await queue.sendBatch(messages); |
|
| 39 | + | queued += messages.length; |
|
| 40 | + | } |
|
| 41 | + | ||
| 42 | + | return c.json({ |
|
| 43 | + | message: "Documents queued for re-processing", |
|
| 44 | + | queued, |
|
| 45 | + | }); |
|
| 46 | + | } catch (error) { |
|
| 47 | + | return c.json( |
|
| 48 | + | { error: "Failed to queue documents", details: String(error) }, |
|
| 49 | + | 500 |
|
| 50 | + | ); |
|
| 51 | + | } |
|
| 52 | + | }); |
|
| 53 | + | ||
| 54 | + | // Mark all documents as stale (alternative - lets cron handle it) |
|
| 55 | + | admin.post("/mark-stale", async (c) => { |
|
| 56 | + | try { |
|
| 57 | + | const db = c.env.DB; |
|
| 58 | + | ||
| 59 | + | const result = await db |
|
| 60 | + | .prepare( |
|
| 61 | + | `UPDATE resolved_documents SET stale_at = datetime('now', '-1 hour')` |
|
| 62 | + | ) |
|
| 63 | + | .run(); |
|
| 64 | + | ||
| 65 | + | return c.json({ |
|
| 66 | + | message: "All documents marked as stale", |
|
| 67 | + | affected: result.meta.changes, |
|
| 68 | + | }); |
|
| 69 | + | } catch (error) { |
|
| 70 | + | return c.json( |
|
| 71 | + | { error: "Failed to mark documents as stale", details: String(error) }, |
|
| 72 | + | 500 |
|
| 73 | + | ); |
|
| 74 | + | } |
|
| 75 | + | }); |
|
| 76 | + | ||
| 77 | + | export default admin; |
| 1 | 1 | import { Hono } from "hono"; |
|
| 2 | - | import type { Bindings } from "../types"; |
|
| 2 | + | import type { Bindings, ResolvedDocumentRow, Document, Publication, BskyPostRef } from "../types"; |
|
| 3 | 3 | ||
| 4 | 4 | const feed = new Hono<{ Bindings: Bindings }>(); |
|
| 5 | 5 | ||
| 6 | + | /** |
|
| 7 | + | * Transforms a database row into a Document object for the API response. |
|
| 8 | + | */ |
|
| 9 | + | function rowToDocument(row: ResolvedDocumentRow): Document { |
|
| 10 | + | // Build publication object if we have publication data |
|
| 11 | + | let publication: Publication | undefined; |
|
| 12 | + | if (row.pub_url && row.pub_name) { |
|
| 13 | + | publication = { |
|
| 14 | + | url: row.pub_url, |
|
| 15 | + | name: row.pub_name, |
|
| 16 | + | description: row.pub_description || undefined, |
|
| 17 | + | iconCid: row.pub_icon_cid || undefined, |
|
| 18 | + | iconUrl: row.pub_icon_url || undefined, |
|
| 19 | + | }; |
|
| 20 | + | } |
|
| 21 | + | ||
| 22 | + | // Parse bskyPostRef if present |
|
| 23 | + | let bskyPostRef: BskyPostRef | undefined; |
|
| 24 | + | if (row.bsky_post_ref) { |
|
| 25 | + | try { |
|
| 26 | + | bskyPostRef = JSON.parse(row.bsky_post_ref); |
|
| 27 | + | } catch { |
|
| 28 | + | // Ignore parse errors |
|
| 29 | + | } |
|
| 30 | + | } |
|
| 31 | + | ||
| 32 | + | // Parse tags if present |
|
| 33 | + | let tags: string[] | undefined; |
|
| 34 | + | if (row.tags) { |
|
| 35 | + | try { |
|
| 36 | + | tags = JSON.parse(row.tags); |
|
| 37 | + | } catch { |
|
| 38 | + | // Ignore parse errors |
|
| 39 | + | } |
|
| 40 | + | } |
|
| 41 | + | ||
| 42 | + | // Parse content if present |
|
| 43 | + | let content: unknown | undefined; |
|
| 44 | + | if (row.content) { |
|
| 45 | + | try { |
|
| 46 | + | content = JSON.parse(row.content); |
|
| 47 | + | } catch { |
|
| 48 | + | // Ignore parse errors |
|
| 49 | + | } |
|
| 50 | + | } |
|
| 51 | + | ||
| 52 | + | return { |
|
| 53 | + | uri: row.uri, |
|
| 54 | + | did: row.did, |
|
| 55 | + | rkey: row.rkey, |
|
| 56 | + | title: row.title || "Untitled", |
|
| 57 | + | description: row.description || undefined, |
|
| 58 | + | path: row.path || undefined, |
|
| 59 | + | site: row.site || undefined, |
|
| 60 | + | content, |
|
| 61 | + | textContent: row.text_content || undefined, |
|
| 62 | + | coverImageCid: row.cover_image_cid || undefined, |
|
| 63 | + | coverImageUrl: row.cover_image_url || undefined, |
|
| 64 | + | bskyPostRef, |
|
| 65 | + | tags, |
|
| 66 | + | publishedAt: row.published_at || undefined, |
|
| 67 | + | updatedAt: row.updated_at || undefined, |
|
| 68 | + | publication, |
|
| 69 | + | viewUrl: row.view_url || undefined, |
|
| 70 | + | pdsEndpoint: row.pds_endpoint || undefined, |
|
| 71 | + | }; |
|
| 72 | + | } |
|
| 73 | + | ||
| 6 | 74 | // Get raw feed data (for client-side fetching) |
|
| 7 | 75 | // Accessible at both /feed/raw and /feed-raw (via alias in index.ts) |
|
| 8 | 76 | feed.get("/raw", async (c) => { |
|
| 44 | 112 | ||
| 45 | 113 | const { results } = await db |
|
| 46 | 114 | .prepare( |
|
| 47 | - | `SELECT uri, did, rkey, title, path, site, content, text_content, published_at, view_url |
|
| 115 | + | `SELECT uri, did, rkey, title, description, path, site, content, text_content, |
|
| 116 | + | cover_image_cid, cover_image_url, bsky_post_ref, tags, |
|
| 117 | + | published_at, updated_at, pub_url, pub_name, pub_description, |
|
| 118 | + | pub_icon_cid, pub_icon_url, view_url, pds_endpoint, |
|
| 119 | + | resolved_at, stale_at |
|
| 48 | 120 | FROM resolved_documents |
|
| 49 | 121 | ORDER BY rkey DESC |
|
| 50 | 122 | LIMIT ? OFFSET ?` |
|
| 51 | 123 | ) |
|
| 52 | 124 | .bind(limit, offset) |
|
| 53 | - | .all<{ |
|
| 54 | - | uri: string; |
|
| 55 | - | did: string; |
|
| 56 | - | rkey: string; |
|
| 57 | - | title: string | null; |
|
| 58 | - | path: string | null; |
|
| 59 | - | site: string | null; |
|
| 60 | - | content: string | null; |
|
| 61 | - | text_content: string | null; |
|
| 62 | - | published_at: string | null; |
|
| 63 | - | view_url: string | null; |
|
| 64 | - | }>(); |
|
| 125 | + | .all<ResolvedDocumentRow>(); |
|
| 65 | 126 | ||
| 66 | - | const documents = (results || []).map((doc) => ({ |
|
| 67 | - | uri: doc.uri, |
|
| 68 | - | did: doc.did, |
|
| 69 | - | rkey: doc.rkey, |
|
| 70 | - | title: doc.title || "Untitled", |
|
| 71 | - | path: doc.path, |
|
| 72 | - | site: doc.site, |
|
| 73 | - | content: doc.content ? JSON.parse(doc.content) : null, |
|
| 74 | - | textContent: doc.text_content, |
|
| 75 | - | publishedAt: doc.published_at, |
|
| 76 | - | viewUrl: doc.view_url, |
|
| 77 | - | })); |
|
| 127 | + | const documents = (results || []).map(rowToDocument); |
|
| 78 | 128 | ||
| 79 | 129 | return c.json({ |
|
| 80 | 130 | count: documents.length, |
|
| 3 | 3 | export { default as feed } from "./feed"; |
|
| 4 | 4 | export { default as stats } from "./stats"; |
|
| 5 | 5 | export { default as records } from "./records"; |
|
| 6 | + | export { default as admin } from "./admin"; |
| 32 | 32 | ||
| 33 | 33 | export type TapEvent = TapRecordEvent | TapIdentityEvent; |
|
| 34 | 34 | ||
| 35 | + | // Strong reference to a Bluesky post |
|
| 36 | + | export interface BskyPostRef { |
|
| 37 | + | uri: string; |
|
| 38 | + | cid: string; |
|
| 39 | + | } |
|
| 40 | + | ||
| 41 | + | // Publication record from site.standard.publication |
|
| 42 | + | export interface Publication { |
|
| 43 | + | url: string; // Base publication URL |
|
| 44 | + | name: string; |
|
| 45 | + | description?: string; |
|
| 46 | + | iconCid?: string; // CID for icon blob |
|
| 47 | + | iconUrl?: string; // Resolved full URL to icon |
|
| 48 | + | } |
|
| 49 | + | ||
| 50 | + | // Document record from site.standard.document |
|
| 35 | 51 | export interface Document { |
|
| 36 | 52 | uri: string; |
|
| 37 | 53 | did: string; |
|
| 38 | 54 | rkey: string; |
|
| 55 | + | // Document fields |
|
| 39 | 56 | title: string; |
|
| 57 | + | description?: string; |
|
| 58 | + | path?: string; |
|
| 59 | + | site?: string; // at:// URI to publication or https:// URL |
|
| 60 | + | content?: unknown; |
|
| 61 | + | textContent?: string; |
|
| 62 | + | coverImageCid?: string; // CID for cover image blob |
|
| 63 | + | coverImageUrl?: string; // Resolved full URL to cover image |
|
| 64 | + | bskyPostRef?: BskyPostRef; |
|
| 65 | + | tags?: string[]; |
|
| 66 | + | publishedAt?: string; |
|
| 67 | + | updatedAt?: string; |
|
| 68 | + | // Resolved publication data |
|
| 69 | + | publication?: Publication; |
|
| 70 | + | // Metadata |
|
| 71 | + | viewUrl?: string; // Canonical URL (publication.url + path) |
|
| 72 | + | pdsEndpoint?: string; // PDS endpoint used for blob URLs |
|
| 73 | + | } |
|
| 74 | + | ||
| 75 | + | // Database row for resolved_documents table |
|
| 76 | + | export interface ResolvedDocumentRow { |
|
| 77 | + | uri: string; |
|
| 78 | + | did: string; |
|
| 79 | + | rkey: string; |
|
| 80 | + | title: string | null; |
|
| 81 | + | description: string | null; |
|
| 40 | 82 | path: string | null; |
|
| 41 | 83 | site: string | null; |
|
| 42 | - | content: unknown; |
|
| 43 | - | textContent: string | null; |
|
| 44 | - | publishedAt: string | null; |
|
| 45 | - | viewUrl: string | null; |
|
| 84 | + | content: string | null; |
|
| 85 | + | text_content: string | null; |
|
| 86 | + | cover_image_cid: string | null; |
|
| 87 | + | cover_image_url: string | null; |
|
| 88 | + | bsky_post_ref: string | null; |
|
| 89 | + | tags: string | null; |
|
| 90 | + | published_at: string | null; |
|
| 91 | + | updated_at: string | null; |
|
| 92 | + | pub_url: string | null; |
|
| 93 | + | pub_name: string | null; |
|
| 94 | + | pub_description: string | null; |
|
| 95 | + | pub_icon_cid: string | null; |
|
| 96 | + | pub_icon_url: string | null; |
|
| 97 | + | view_url: string | null; |
|
| 98 | + | pds_endpoint: string | null; |
|
| 99 | + | resolved_at: string | null; |
|
| 100 | + | stale_at: string | null; |
|
| 46 | 101 | } |
| 1 | + | /** |
|
| 2 | + | * Constructs a full URL to fetch a blob from a PDS. |
|
| 3 | + | * Format: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid} |
|
| 4 | + | */ |
|
| 5 | + | export function buildBlobUrl(pds: string, did: string, cid: string): string { |
|
| 6 | + | const baseUrl = pds.endsWith("/") ? pds.slice(0, -1) : pds; |
|
| 7 | + | return `${baseUrl}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; |
|
| 8 | + | } |
|
| 9 | + | ||
| 10 | + | /** |
|
| 11 | + | * Extracts the CID from a blob reference object. |
|
| 12 | + | * Blob refs can be in different formats: |
|
| 13 | + | * - { $link: "cid" } (legacy) |
|
| 14 | + | * - { ref: { $link: "cid" } } (current) |
|
| 15 | + | * - { cid: "cid" } (simple) |
|
| 16 | + | */ |
|
| 17 | + | export function extractBlobCid(blob: unknown): string | null { |
|
| 18 | + | if (!blob || typeof blob !== "object") return null; |
|
| 19 | + | ||
| 20 | + | const b = blob as Record<string, unknown>; |
|
| 21 | + | ||
| 22 | + | // Current format: { ref: { $link: "cid" } } |
|
| 23 | + | if (b.ref && typeof b.ref === "object") { |
|
| 24 | + | const ref = b.ref as Record<string, unknown>; |
|
| 25 | + | if (typeof ref.$link === "string") return ref.$link; |
|
| 26 | + | } |
|
| 27 | + | ||
| 28 | + | // Legacy format: { $link: "cid" } |
|
| 29 | + | if (typeof b.$link === "string") return b.$link; |
|
| 30 | + | ||
| 31 | + | // Simple format: { cid: "cid" } |
|
| 32 | + | if (typeof b.cid === "string") return b.cid; |
|
| 33 | + | ||
| 34 | + | return null; |
|
| 35 | + | } |
| 1 | 1 | import { resolvePds } from "./resolver"; |
|
| 2 | 2 | import { parseAtUri } from "./at-uri"; |
|
| 3 | + | import { buildBlobUrl, extractBlobCid } from "./blob"; |
|
| 3 | 4 | ||
| 4 | - | export async function resolveViewUrl( |
|
| 5 | + | // Raw document record from PDS |
|
| 6 | + | interface DocumentRecord { |
|
| 7 | + | site?: string; |
|
| 8 | + | path?: string; |
|
| 9 | + | title?: string; |
|
| 10 | + | description?: string; |
|
| 11 | + | coverImage?: unknown; |
|
| 12 | + | content?: unknown; |
|
| 13 | + | textContent?: string; |
|
| 14 | + | bskyPostRef?: { uri: string; cid: string }; |
|
| 15 | + | tags?: string[]; |
|
| 16 | + | publishedAt?: string; |
|
| 17 | + | updatedAt?: string; |
|
| 18 | + | } |
|
| 19 | + | ||
| 20 | + | // Raw publication record from PDS |
|
| 21 | + | interface PublicationRecord { |
|
| 22 | + | url?: string; |
|
| 23 | + | name?: string; |
|
| 24 | + | description?: string; |
|
| 25 | + | icon?: unknown; |
|
| 26 | + | } |
|
| 27 | + | ||
| 28 | + | // Resolved publication data |
|
| 29 | + | interface ResolvedPublication { |
|
| 30 | + | url: string; |
|
| 31 | + | name: string; |
|
| 32 | + | description: string | null; |
|
| 33 | + | iconCid: string | null; |
|
| 34 | + | iconUrl: string | null; |
|
| 35 | + | } |
|
| 36 | + | ||
| 37 | + | /** |
|
| 38 | + | * Fetches a publication record from an at:// URI |
|
| 39 | + | */ |
|
| 40 | + | async function fetchPublication( |
|
| 5 | 41 | db: D1Database, |
|
| 6 | - | siteUri: string, |
|
| 7 | - | path: string |
|
| 8 | - | ): Promise<string | null> { |
|
| 42 | + | siteUri: string |
|
| 43 | + | ): Promise<ResolvedPublication | null> { |
|
| 9 | 44 | const parsed = parseAtUri(siteUri); |
|
| 10 | 45 | if (!parsed) return null; |
|
| 11 | 46 | ||
| 18 | 53 | )}&collection=${encodeURIComponent(parsed.collection)}&rkey=${encodeURIComponent( |
|
| 19 | 54 | parsed.rkey |
|
| 20 | 55 | )}`; |
|
| 56 | + | ||
| 21 | 57 | const response = await fetch(url); |
|
| 22 | 58 | if (!response.ok) return null; |
|
| 23 | 59 | ||
| 24 | - | const data = (await response.json()) as { |
|
| 25 | - | value?: { url?: string; domain?: string }; |
|
| 26 | - | }; |
|
| 27 | - | const siteUrl = data.value?.url || data.value?.domain; |
|
| 28 | - | if (!siteUrl) return null; |
|
| 60 | + | const data = (await response.json()) as { value?: PublicationRecord }; |
|
| 61 | + | const pub = data.value; |
|
| 62 | + | if (!pub?.url || !pub?.name) return null; |
|
| 29 | 63 | ||
| 30 | - | const baseUrl = siteUrl.startsWith("http") ? siteUrl : `https://${siteUrl}`; |
|
| 31 | - | return `${baseUrl}${path}`; |
|
| 64 | + | const iconCid = extractBlobCid(pub.icon); |
|
| 65 | + | const iconUrl = iconCid ? buildBlobUrl(pds, parsed.did, iconCid) : null; |
|
| 66 | + | ||
| 67 | + | return { |
|
| 68 | + | url: pub.url, |
|
| 69 | + | name: pub.name, |
|
| 70 | + | description: pub.description || null, |
|
| 71 | + | iconCid, |
|
| 72 | + | iconUrl, |
|
| 73 | + | }; |
|
| 32 | 74 | } catch { |
|
| 33 | 75 | return null; |
|
| 34 | 76 | } |
|
| 35 | 77 | } |
|
| 36 | 78 | ||
| 79 | + | /** |
|
| 80 | + | * Resolves the view URL for a document. |
|
| 81 | + | * If site is an at:// URI, fetches the publication to get the base URL. |
|
| 82 | + | * If site is an https:// URL, uses it directly. |
|
| 83 | + | */ |
|
| 84 | + | export async function resolveViewUrl( |
|
| 85 | + | db: D1Database, |
|
| 86 | + | siteUri: string, |
|
| 87 | + | path: string |
|
| 88 | + | ): Promise<string | null> { |
|
| 89 | + | // Check if site is an at:// URI or direct URL |
|
| 90 | + | if (siteUri.startsWith("at://")) { |
|
| 91 | + | const pub = await fetchPublication(db, siteUri); |
|
| 92 | + | if (!pub?.url) return null; |
|
| 93 | + | const baseUrl = pub.url.startsWith("http") ? pub.url : `https://${pub.url}`; |
|
| 94 | + | return `${baseUrl.replace(/\/$/, "")}${path}`; |
|
| 95 | + | } |
|
| 96 | + | ||
| 97 | + | // Direct URL |
|
| 98 | + | const baseUrl = siteUri.startsWith("http") ? siteUri : `https://${siteUri}`; |
|
| 99 | + | return `${baseUrl.replace(/\/$/, "")}${path}`; |
|
| 100 | + | } |
|
| 101 | + | ||
| 102 | + | /** |
|
| 103 | + | * Processes a document record: fetches from PDS, resolves publication, |
|
| 104 | + | * and stores all fields in resolved_documents table. |
|
| 105 | + | */ |
|
| 37 | 106 | export async function processDocument( |
|
| 38 | 107 | db: D1Database, |
|
| 39 | 108 | did: string, |
|
| 48 | 117 | return; |
|
| 49 | 118 | } |
|
| 50 | 119 | ||
| 51 | - | // 2. Fetch Record |
|
| 120 | + | // 2. Fetch Document Record |
|
| 52 | 121 | const url = `${pds}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent( |
|
| 53 | 122 | did |
|
| 54 | 123 | )}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`; |
|
| 55 | - | ||
| 124 | + | ||
| 56 | 125 | const response = await fetch(url); |
|
| 57 | 126 | if (!response.ok) { |
|
| 58 | 127 | if (response.status === 404) { |
|
| 59 | - | // Record deleted? |
|
| 60 | - | console.warn(`Record not found: ${did}/${collection}/${rkey}`); |
|
| 128 | + | console.warn(`Record not found: ${did}/${collection}/${rkey}`); |
|
| 61 | 129 | } |
|
| 62 | 130 | return; |
|
| 63 | 131 | } |
|
| 65 | 133 | const data = (await response.json()) as { |
|
| 66 | 134 | uri: string; |
|
| 67 | 135 | cid?: string; |
|
| 68 | - | value: { |
|
| 69 | - | title?: string; |
|
| 70 | - | path?: string; |
|
| 71 | - | site?: string; |
|
| 72 | - | content?: unknown; |
|
| 73 | - | textContent?: string; |
|
| 74 | - | publishedAt?: string; |
|
| 75 | - | [key: string]: unknown; |
|
| 76 | - | }; |
|
| 136 | + | value: DocumentRecord; |
|
| 77 | 137 | }; |
|
| 78 | 138 | ||
| 79 | 139 | const { value, cid } = data; |
|
| 90 | 150 | .bind(did, rkey, collection, cid || null, cid || null) |
|
| 91 | 151 | .run(); |
|
| 92 | 152 | ||
| 93 | - | // 4. Resolve View URL and Update resolved_documents |
|
| 94 | - | const uri = `at://${did}/${collection}/${rkey}`; |
|
| 153 | + | // 4. Extract document fields |
|
| 154 | + | const title = value.title || null; |
|
| 155 | + | const description = value.description || null; |
|
| 156 | + | const path = value.path || null; |
|
| 157 | + | const site = value.site || null; |
|
| 158 | + | const content = value.content ? JSON.stringify(value.content) : null; |
|
| 159 | + | const textContent = value.textContent || null; |
|
| 160 | + | const coverImageCid = extractBlobCid(value.coverImage); |
|
| 161 | + | const coverImageUrl = coverImageCid ? buildBlobUrl(pds, did, coverImageCid) : null; |
|
| 162 | + | const bskyPostRef = value.bskyPostRef ? JSON.stringify(value.bskyPostRef) : null; |
|
| 163 | + | const tags = value.tags ? JSON.stringify(value.tags) : null; |
|
| 164 | + | const publishedAt = value.publishedAt || null; |
|
| 165 | + | const updatedAt = value.updatedAt || null; |
|
| 166 | + | ||
| 167 | + | // 5. Resolve publication if site is at:// URI |
|
| 168 | + | let pubUrl: string | null = null; |
|
| 169 | + | let pubName: string | null = null; |
|
| 170 | + | let pubDescription: string | null = null; |
|
| 171 | + | let pubIconCid: string | null = null; |
|
| 172 | + | let pubIconUrl: string | null = null; |
|
| 95 | 173 | let viewUrl: string | null = null; |
|
| 96 | - | if (value.site && value.path) { |
|
| 97 | - | viewUrl = await resolveViewUrl(db, value.site, value.path); |
|
| 174 | + | ||
| 175 | + | if (site) { |
|
| 176 | + | if (site.startsWith("at://")) { |
|
| 177 | + | // Fetch publication record |
|
| 178 | + | const pub = await fetchPublication(db, site); |
|
| 179 | + | if (pub) { |
|
| 180 | + | pubUrl = pub.url; |
|
| 181 | + | pubName = pub.name; |
|
| 182 | + | pubDescription = pub.description; |
|
| 183 | + | pubIconCid = pub.iconCid; |
|
| 184 | + | pubIconUrl = pub.iconUrl; |
|
| 185 | + | // Construct view URL |
|
| 186 | + | if (pubUrl && path) { |
|
| 187 | + | const baseUrl = pubUrl.startsWith("http") ? pubUrl : `https://${pubUrl}`; |
|
| 188 | + | viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`; |
|
| 189 | + | } |
|
| 190 | + | } |
|
| 191 | + | } else { |
|
| 192 | + | // Site is a direct URL (loose document) |
|
| 193 | + | pubUrl = site; |
|
| 194 | + | if (path) { |
|
| 195 | + | const baseUrl = site.startsWith("http") ? site : `https://${site}`; |
|
| 196 | + | viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`; |
|
| 197 | + | } |
|
| 198 | + | } |
|
| 98 | 199 | } |
|
| 99 | 200 | ||
| 100 | - | // Set stale_at to 12 hours from now |
|
| 201 | + | // 6. Insert/update resolved_documents |
|
| 202 | + | const uri = `at://${did}/${collection}/${rkey}`; |
|
| 101 | 203 | const STALE_OFFSET_HOURS = 12; |
|
| 102 | 204 | ||
| 103 | 205 | await db |
|
| 104 | 206 | .prepare( |
|
| 105 | - | `INSERT INTO resolved_documents (uri, did, rkey, title, path, site, content, text_content, published_at, view_url, resolved_at, stale_at) |
|
| 106 | - | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours')) |
|
| 107 | - | ON CONFLICT(uri) DO UPDATE SET |
|
| 108 | - | title = ?, |
|
| 109 | - | path = ?, |
|
| 110 | - | site = ?, |
|
| 111 | - | content = ?, |
|
| 112 | - | text_content = ?, |
|
| 113 | - | published_at = ?, |
|
| 114 | - | view_url = ?, |
|
| 115 | - | resolved_at = datetime('now'), |
|
| 116 | - | stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')` |
|
| 207 | + | `INSERT INTO resolved_documents ( |
|
| 208 | + | uri, did, rkey, title, description, path, site, content, text_content, |
|
| 209 | + | cover_image_cid, cover_image_url, bsky_post_ref, tags, |
|
| 210 | + | published_at, updated_at, pub_url, pub_name, pub_description, |
|
| 211 | + | pub_icon_cid, pub_icon_url, view_url, pds_endpoint, |
|
| 212 | + | resolved_at, stale_at |
|
| 213 | + | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours')) |
|
| 214 | + | ON CONFLICT(uri) DO UPDATE SET |
|
| 215 | + | title = ?, description = ?, path = ?, site = ?, content = ?, text_content = ?, |
|
| 216 | + | cover_image_cid = ?, cover_image_url = ?, bsky_post_ref = ?, tags = ?, |
|
| 217 | + | published_at = ?, updated_at = ?, pub_url = ?, pub_name = ?, pub_description = ?, |
|
| 218 | + | pub_icon_cid = ?, pub_icon_url = ?, view_url = ?, pds_endpoint = ?, |
|
| 219 | + | resolved_at = datetime('now'), stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')` |
|
| 117 | 220 | ) |
|
| 118 | 221 | .bind( |
|
| 119 | - | uri, |
|
| 120 | - | did, |
|
| 121 | - | rkey, |
|
| 122 | - | value.title || null, |
|
| 123 | - | value.path || null, |
|
| 124 | - | value.site || null, |
|
| 125 | - | value.content ? JSON.stringify(value.content) : null, |
|
| 126 | - | value.textContent || null, |
|
| 127 | - | value.publishedAt || null, |
|
| 128 | - | viewUrl, |
|
| 129 | - | // Update bindings |
|
| 130 | - | value.title || null, |
|
| 131 | - | value.path || null, |
|
| 132 | - | value.site || null, |
|
| 133 | - | value.content ? JSON.stringify(value.content) : null, |
|
| 134 | - | value.textContent || null, |
|
| 135 | - | value.publishedAt || null, |
|
| 136 | - | viewUrl |
|
| 222 | + | // INSERT values |
|
| 223 | + | uri, did, rkey, title, description, path, site, content, textContent, |
|
| 224 | + | coverImageCid, coverImageUrl, bskyPostRef, tags, |
|
| 225 | + | publishedAt, updatedAt, pubUrl, pubName, pubDescription, |
|
| 226 | + | pubIconCid, pubIconUrl, viewUrl, pds, |
|
| 227 | + | // UPDATE values |
|
| 228 | + | title, description, path, site, content, textContent, |
|
| 229 | + | coverImageCid, coverImageUrl, bskyPostRef, tags, |
|
| 230 | + | publishedAt, updatedAt, pubUrl, pubName, pubDescription, |
|
| 231 | + | pubIconCid, pubIconUrl, viewUrl, pds |
|
| 137 | 232 | ) |
|
| 138 | 233 | .run(); |
|
| 234 | + | ||
| 235 | + | console.log(`Processed document: ${uri}`); |
|
| 139 | 236 | } catch (error) { |
|
| 140 | 237 | console.error(`Error processing document ${did}/${collection}/${rkey}:`, error); |
|
| 141 | 238 | } |
|
| 1 | 1 | export { parseAtUri, buildAtUri, type AtUriComponents } from "./at-uri"; |
|
| 2 | 2 | export { resolvePds } from "./resolver"; |
|
| 3 | 3 | export { resolveViewUrl, processDocument } from "./document"; |
|
| 4 | + | export { buildBlobUrl, extractBlobCid } from "./blob"; |