chore: impoved data indexing and added more data to the client 654390cd
Steve · 2026-01-13 09:29 14 file(s) · +586 −110
package.json +2 −1
11 11
		"dev:client": "cd packages/client && npm run dev",
12 12
		"deploy": "cd packages/server && npm run deploy",
13 13
		"deploy:client": "cd packages/client && npm run pages:deploy",
14 -
		"db:create": "cd packages/server && npm run db:create",
14 +
		"db:init": "cd packages/server && npm run db:init",
15 +
		"db:init:prod": "cd packages/server && npm run db:init:prod",
15 16
		"db:migrate": "cd packages/server && npm run db:migrate",
16 17
		"db:migrate:prod": "cd packages/server && npm run db:migrate:prod",
17 18
		"secret:set": "cd packages/server && npm run secret:set"
packages/client/package.json +1 −1
6 6
		"dev": "vite",
7 7
		"build": "tsc && vite build",
8 8
		"preview": "vite preview",
9 -
		"pages:deploy": "vite build && wrangler pages deploy dist --project-name=atfeeds"
9 +
		"deploy": "vite build && wrangler pages deploy dist"
10 10
	},
11 11
	"dependencies": {
12 12
		"react": "^18.2.0",
packages/client/src/App.tsx +130 −15
3 3
// API base URL - empty for same-origin (local dev), or set via env var for production
4 4
const API_URL = "https://atfeeds-api.stevedsimkins.workers.dev";
5 5
6 +
interface BskyPostRef {
7 +
	uri: string;
8 +
	cid: string;
9 +
}
10 +
11 +
interface Publication {
12 +
	url: string;
13 +
	name: string;
14 +
	description?: string;
15 +
	iconCid?: string;
16 +
	iconUrl?: string;
17 +
}
18 +
6 19
interface Document {
7 20
	uri: string;
8 21
	did: string;
9 22
	rkey: string;
10 23
	title: string;
11 -
	path: string | null;
12 -
	site: string | null;
13 -
	content: {
24 +
	description?: string;
25 +
	path?: string;
26 +
	site?: string;
27 +
	content?: {
14 28
		$type: string;
15 29
		markdown?: string;
16 -
	} | null;
17 -
	textContent: string | null;
18 -
	publishedAt: string | null;
19 -
	viewUrl: string | null;
30 +
	};
31 +
	textContent?: string;
32 +
	coverImageCid?: string;
33 +
	coverImageUrl?: string;
34 +
	bskyPostRef?: BskyPostRef;
35 +
	tags?: string[];
36 +
	publishedAt?: string;
37 +
	updatedAt?: string;
38 +
	publication?: Publication;
39 +
	viewUrl?: string;
40 +
	pdsEndpoint?: string;
20 41
}
21 42
22 43
interface FeedResponse {
52 73
		fetchFeed();
53 74
	}, []);
54 75
55 -
	const formatDate = (dateString: string | null) => {
76 +
	const formatDate = (dateString?: string) => {
56 77
		if (!dateString) return "Unknown date";
57 78
		return new Date(dateString).toLocaleDateString("en-US", {
58 79
			year: "numeric",
61 82
		});
62 83
	};
63 84
64 -
	const truncateText = (text: string | null, maxLength: number = 200) => {
85 +
	const truncateText = (text?: string, maxLength: number = 200) => {
65 86
		if (!text) return "";
66 87
		if (text.length <= maxLength) return text;
67 88
		return text.slice(0, maxLength) + "...";
89 +
	};
90 +
91 +
	const getDescription = (doc: Document) => {
92 +
		return doc.description || doc.textContent || "";
68 93
	};
69 94
70 95
130 155
									)}
131 156
								</legend>
132 157
								<div style={{ padding: "8px" }}>
158 +
									{/* Publication info */}
159 +
									{doc.publication && (
160 +
										<div
161 +
											style={{
162 +
												display: "flex",
163 +
												alignItems: "center",
164 +
												gap: "8px",
165 +
												marginBottom: "8px",
166 +
												fontSize: "0.85em",
167 +
											}}
168 +
										>
169 +
											{doc.publication.iconUrl && (
170 +
												<img
171 +
													src={doc.publication.iconUrl}
172 +
													alt={doc.publication.name}
173 +
													style={{
174 +
														width: "16px",
175 +
														height: "16px",
176 +
														objectFit: "cover",
177 +
													}}
178 +
												/>
179 +
											)}
180 +
											<span style={{ fontWeight: "bold" }}>
181 +
												{doc.publication.name}
182 +
											</span>
183 +
										</div>
184 +
									)}
185 +
186 +
									{/* Cover image */}
187 +
									{doc.coverImageUrl && (
188 +
										<div style={{ marginBottom: "8px" }}>
189 +
											<img
190 +
												src={doc.coverImageUrl}
191 +
												alt={doc.title}
192 +
												style={{
193 +
													maxWidth: "100%",
194 +
													maxHeight: "200px",
195 +
													objectFit: "cover",
196 +
													border: "1px solid #888",
197 +
												}}
198 +
											/>
199 +
										</div>
200 +
									)}
201 +
202 +
									{/* Date */}
133 203
									<div
134 204
										style={{
135 205
											marginBottom: "8px",
138 208
										}}
139 209
									>
140 210
										Published: {formatDate(doc.publishedAt)}
211 +
										{doc.updatedAt && doc.updatedAt !== doc.publishedAt && (
212 +
											<> | Updated: {formatDate(doc.updatedAt)}</>
213 +
										)}
141 214
									</div>
142 -
									{doc.textContent && (
215 +
216 +
									{/* Description */}
217 +
									{getDescription(doc) && (
143 218
										<p style={{ marginBottom: "12px" }}>
144 -
											{truncateText(doc.textContent)}
219 +
											{truncateText(getDescription(doc))}
145 220
										</p>
146 221
									)}
147 -
									{doc.viewUrl && (
148 -
										<div style={{ textAlign: "right" }}>
222 +
223 +
									{/* Tags */}
224 +
									{doc.tags && doc.tags.length > 0 && (
225 +
										<div
226 +
											style={{
227 +
												display: "flex",
228 +
												flexWrap: "wrap",
229 +
												gap: "4px",
230 +
												marginBottom: "12px",
231 +
											}}
232 +
										>
233 +
											{doc.tags.map((tag) => (
234 +
												<span
235 +
													key={tag}
236 +
													style={{
237 +
														background: "#c0c0c0",
238 +
														padding: "2px 6px",
239 +
														fontSize: "0.75em",
240 +
														border: "1px solid #808080",
241 +
													}}
242 +
												>
243 +
													{tag}
244 +
												</span>
245 +
											))}
246 +
										</div>
247 +
									)}
248 +
249 +
									{/* Actions */}
250 +
									<div style={{ display: "flex", gap: "8px", justifyContent: "flex-end" }}>
251 +
										{doc.bskyPostRef && (
252 +
											<button
253 +
												onClick={() =>
254 +
													window.open(
255 +
														`https://bsky.app/profile/${doc.did}/post/${doc.bskyPostRef!.uri.split("/").pop()}`,
256 +
														"_blank"
257 +
													)
258 +
												}
259 +
											>
260 +
												View on Bluesky
261 +
											</button>
262 +
										)}
263 +
										{doc.viewUrl && (
149 264
											<button
150 265
												onClick={() =>
151 266
													window.open(doc.viewUrl || "", "_blank")
153 268
											>
154 269
												Read More
155 270
											</button>
156 -
										</div>
157 -
									)}
271 +
										)}
272 +
									</div>
158 273
								</div>
159 274
							</fieldset>
160 275
						))}
packages/server/migrations/001_add_document_fields.sql (added) +23 −0
1 +
-- Migration: Add full Document and Publication fields to resolved_documents
2 +
-- Run with: wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote
3 +
4 +
-- Document fields
5 +
ALTER TABLE resolved_documents ADD COLUMN description TEXT;
6 +
ALTER TABLE resolved_documents ADD COLUMN cover_image_cid TEXT;
7 +
ALTER TABLE resolved_documents ADD COLUMN cover_image_url TEXT;
8 +
ALTER TABLE resolved_documents ADD COLUMN bsky_post_ref TEXT;
9 +
ALTER TABLE resolved_documents ADD COLUMN tags TEXT;
10 +
ALTER TABLE resolved_documents ADD COLUMN updated_at TEXT;
11 +
12 +
-- Publication fields
13 +
ALTER TABLE resolved_documents ADD COLUMN pub_url TEXT;
14 +
ALTER TABLE resolved_documents ADD COLUMN pub_name TEXT;
15 +
ALTER TABLE resolved_documents ADD COLUMN pub_description TEXT;
16 +
ALTER TABLE resolved_documents ADD COLUMN pub_icon_cid TEXT;
17 +
ALTER TABLE resolved_documents ADD COLUMN pub_icon_url TEXT;
18 +
19 +
-- Metadata
20 +
ALTER TABLE resolved_documents ADD COLUMN pds_endpoint TEXT;
21 +
22 +
-- Index for publication queries
23 +
CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url);
packages/server/package.json +5 −1
4 4
	"private": true,
5 5
	"scripts": {
6 6
		"dev": "wrangler dev",
7 -
		"deploy": "wrangler deploy"
7 +
		"deploy": "wrangler deploy",
8 +
		"db:init": "wrangler d1 execute atfeeds-db --file=schema.sql --local",
9 +
		"db:init:prod": "wrangler d1 execute atfeeds-db --file=schema.sql --remote",
10 +
		"db:migrate": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --local",
11 +
		"db:migrate:prod": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote"
8 12
	},
9 13
	"dependencies": {
10 14
		"hono": "^4.0.0"
packages/server/schema.sql +18 −2
32 32
  uri TEXT PRIMARY KEY,
33 33
  did TEXT NOT NULL,
34 34
  rkey TEXT NOT NULL,
35 +
  -- Document fields
35 36
  title TEXT,
37 +
  description TEXT,
36 38
  path TEXT,
37 39
  site TEXT,
38 -
  content TEXT,  -- JSON blob
40 +
  content TEXT,  -- JSON blob for content union
39 41
  text_content TEXT,
42 +
  cover_image_cid TEXT,  -- CID for cover image blob
43 +
  cover_image_url TEXT,  -- Full URL: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid}
44 +
  bsky_post_ref TEXT,  -- JSON blob for strong reference {uri, cid}
45 +
  tags TEXT,  -- JSON array of strings
40 46
  published_at TEXT,
41 -
  view_url TEXT,
47 +
  updated_at TEXT,
48 +
  -- Publication fields (resolved from site at:// URI)
49 +
  pub_url TEXT,  -- Publication base URL
50 +
  pub_name TEXT,
51 +
  pub_description TEXT,
52 +
  pub_icon_cid TEXT,  -- CID for publication icon blob
53 +
  pub_icon_url TEXT,  -- Full URL to publication icon
54 +
  -- Metadata
55 +
  view_url TEXT,  -- Constructed canonical URL (pub_url + path)
56 +
  pds_endpoint TEXT,  -- Cached PDS endpoint for this DID
42 57
  resolved_at TEXT DEFAULT (datetime('now')),
43 58
  stale_at TEXT  -- When this record should be re-resolved
44 59
);
45 60
46 61
CREATE INDEX IF NOT EXISTS idx_resolved_documents_rkey ON resolved_documents(rkey DESC);
47 62
CREATE INDEX IF NOT EXISTS idx_resolved_documents_stale ON resolved_documents(stale_at);
63 +
CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url);
packages/server/src/index.ts +2 −1
1 1
import { Hono } from "hono";
2 2
import { cors } from "hono/cors";
3 3
import type { Bindings } from "./types";
4 -
import { health, webhook, feed, stats, records } from "./routes";
4 +
import { health, webhook, feed, stats, records, admin } from "./routes";
5 5
import { processDocument } from "./utils";
6 6
7 7
const app = new Hono<{ Bindings: Bindings }>();
15 15
app.route("/feed", feed);
16 16
app.route("/stats", stats);
17 17
app.route("/records", records);
18 +
app.route("/admin", admin);
18 19
19 20
// Legacy alias: /feed-raw -> /feed/raw
20 21
app.get("/feed-raw", async (c) => {
packages/server/src/routes/admin.ts (added) +77 −0
1 +
import { Hono } from "hono";
2 +
import type { Bindings } from "../types";
3 +
4 +
const admin = new Hono<{ Bindings: Bindings }>();
5 +
6 +
// Queue all documents for re-processing
7 +
admin.post("/resolve-all", async (c) => {
8 +
  try {
9 +
    const db = c.env.DB;
10 +
    const queue = c.env.RESOLUTION_QUEUE;
11 +
12 +
    // Get all records from repo_records
13 +
    const { results } = await db
14 +
      .prepare(
15 +
        `SELECT did, rkey FROM repo_records
16 +
         WHERE collection = 'site.standard.document'`
17 +
      )
18 +
      .all<{ did: string; rkey: string }>();
19 +
20 +
    if (!results || results.length === 0) {
21 +
      return c.json({ message: "No documents to process", queued: 0 });
22 +
    }
23 +
24 +
    // Queue in batches of 100 (Cloudflare Queue limit)
25 +
    const batchSize = 100;
26 +
    let queued = 0;
27 +
28 +
    for (let i = 0; i < results.length; i += batchSize) {
29 +
      const batch = results.slice(i, i + batchSize);
30 +
      const messages = batch.map((row) => ({
31 +
        body: {
32 +
          did: row.did,
33 +
          collection: "site.standard.document",
34 +
          rkey: row.rkey,
35 +
        },
36 +
      }));
37 +
38 +
      await queue.sendBatch(messages);
39 +
      queued += messages.length;
40 +
    }
41 +
42 +
    return c.json({
43 +
      message: "Documents queued for re-processing",
44 +
      queued,
45 +
    });
46 +
  } catch (error) {
47 +
    return c.json(
48 +
      { error: "Failed to queue documents", details: String(error) },
49 +
      500
50 +
    );
51 +
  }
52 +
});
53 +
54 +
// Mark all documents as stale (alternative - lets cron handle it)
55 +
admin.post("/mark-stale", async (c) => {
56 +
  try {
57 +
    const db = c.env.DB;
58 +
59 +
    const result = await db
60 +
      .prepare(
61 +
        `UPDATE resolved_documents SET stale_at = datetime('now', '-1 hour')`
62 +
      )
63 +
      .run();
64 +
65 +
    return c.json({
66 +
      message: "All documents marked as stale",
67 +
      affected: result.meta.changes,
68 +
    });
69 +
  } catch (error) {
70 +
    return c.json(
71 +
      { error: "Failed to mark documents as stale", details: String(error) },
72 +
      500
73 +
    );
74 +
  }
75 +
});
76 +
77 +
export default admin;
packages/server/src/routes/feed.ts +76 −26
1 1
import { Hono } from "hono";
2 -
import type { Bindings } from "../types";
2 +
import type { Bindings, ResolvedDocumentRow, Document, Publication, BskyPostRef } from "../types";
3 3
4 4
const feed = new Hono<{ Bindings: Bindings }>();
5 5
6 +
/**
7 +
 * Transforms a database row into a Document object for the API response.
8 +
 */
9 +
function rowToDocument(row: ResolvedDocumentRow): Document {
10 +
  // Build publication object if we have publication data
11 +
  let publication: Publication | undefined;
12 +
  if (row.pub_url && row.pub_name) {
13 +
    publication = {
14 +
      url: row.pub_url,
15 +
      name: row.pub_name,
16 +
      description: row.pub_description || undefined,
17 +
      iconCid: row.pub_icon_cid || undefined,
18 +
      iconUrl: row.pub_icon_url || undefined,
19 +
    };
20 +
  }
21 +
22 +
  // Parse bskyPostRef if present
23 +
  let bskyPostRef: BskyPostRef | undefined;
24 +
  if (row.bsky_post_ref) {
25 +
    try {
26 +
      bskyPostRef = JSON.parse(row.bsky_post_ref);
27 +
    } catch {
28 +
      // Ignore parse errors
29 +
    }
30 +
  }
31 +
32 +
  // Parse tags if present
33 +
  let tags: string[] | undefined;
34 +
  if (row.tags) {
35 +
    try {
36 +
      tags = JSON.parse(row.tags);
37 +
    } catch {
38 +
      // Ignore parse errors
39 +
    }
40 +
  }
41 +
42 +
  // Parse content if present
43 +
  let content: unknown | undefined;
44 +
  if (row.content) {
45 +
    try {
46 +
      content = JSON.parse(row.content);
47 +
    } catch {
48 +
      // Ignore parse errors
49 +
    }
50 +
  }
51 +
52 +
  return {
53 +
    uri: row.uri,
54 +
    did: row.did,
55 +
    rkey: row.rkey,
56 +
    title: row.title || "Untitled",
57 +
    description: row.description || undefined,
58 +
    path: row.path || undefined,
59 +
    site: row.site || undefined,
60 +
    content,
61 +
    textContent: row.text_content || undefined,
62 +
    coverImageCid: row.cover_image_cid || undefined,
63 +
    coverImageUrl: row.cover_image_url || undefined,
64 +
    bskyPostRef,
65 +
    tags,
66 +
    publishedAt: row.published_at || undefined,
67 +
    updatedAt: row.updated_at || undefined,
68 +
    publication,
69 +
    viewUrl: row.view_url || undefined,
70 +
    pdsEndpoint: row.pds_endpoint || undefined,
71 +
  };
72 +
}
73 +
6 74
// Get raw feed data (for client-side fetching)
7 75
// Accessible at both /feed/raw and /feed-raw (via alias in index.ts)
8 76
feed.get("/raw", async (c) => {
44 112
45 113
    const { results } = await db
46 114
      .prepare(
47 -
        `SELECT uri, did, rkey, title, path, site, content, text_content, published_at, view_url
115 +
        `SELECT uri, did, rkey, title, description, path, site, content, text_content,
116 +
                cover_image_cid, cover_image_url, bsky_post_ref, tags,
117 +
                published_at, updated_at, pub_url, pub_name, pub_description,
118 +
                pub_icon_cid, pub_icon_url, view_url, pds_endpoint,
119 +
                resolved_at, stale_at
48 120
         FROM resolved_documents
49 121
         ORDER BY rkey DESC
50 122
         LIMIT ? OFFSET ?`
51 123
      )
52 124
      .bind(limit, offset)
53 -
      .all<{
54 -
        uri: string;
55 -
        did: string;
56 -
        rkey: string;
57 -
        title: string | null;
58 -
        path: string | null;
59 -
        site: string | null;
60 -
        content: string | null;
61 -
        text_content: string | null;
62 -
        published_at: string | null;
63 -
        view_url: string | null;
64 -
      }>();
125 +
      .all<ResolvedDocumentRow>();
65 126
66 -
    const documents = (results || []).map((doc) => ({
67 -
      uri: doc.uri,
68 -
      did: doc.did,
69 -
      rkey: doc.rkey,
70 -
      title: doc.title || "Untitled",
71 -
      path: doc.path,
72 -
      site: doc.site,
73 -
      content: doc.content ? JSON.parse(doc.content) : null,
74 -
      textContent: doc.text_content,
75 -
      publishedAt: doc.published_at,
76 -
      viewUrl: doc.view_url,
77 -
    }));
127 +
    const documents = (results || []).map(rowToDocument);
78 128
79 129
    return c.json({
80 130
      count: documents.length,
packages/server/src/routes/index.ts +1 −0
3 3
export { default as feed } from "./feed";
4 4
export { default as stats } from "./stats";
5 5
export { default as records } from "./records";
6 +
export { default as admin } from "./admin";
packages/server/src/types/index.ts +59 −4
32 32
33 33
export type TapEvent = TapRecordEvent | TapIdentityEvent;
34 34
35 +
// Strong reference to a Bluesky post
36 +
export interface BskyPostRef {
37 +
  uri: string;
38 +
  cid: string;
39 +
}
40 +
41 +
// Publication record from site.standard.publication
42 +
export interface Publication {
43 +
  url: string;  // Base publication URL
44 +
  name: string;
45 +
  description?: string;
46 +
  iconCid?: string;  // CID for icon blob
47 +
  iconUrl?: string;  // Resolved full URL to icon
48 +
}
49 +
50 +
// Document record from site.standard.document
35 51
export interface Document {
36 52
  uri: string;
37 53
  did: string;
38 54
  rkey: string;
55 +
  // Document fields
39 56
  title: string;
57 +
  description?: string;
58 +
  path?: string;
59 +
  site?: string;  // at:// URI to publication or https:// URL
60 +
  content?: unknown;
61 +
  textContent?: string;
62 +
  coverImageCid?: string;  // CID for cover image blob
63 +
  coverImageUrl?: string;  // Resolved full URL to cover image
64 +
  bskyPostRef?: BskyPostRef;
65 +
  tags?: string[];
66 +
  publishedAt?: string;
67 +
  updatedAt?: string;
68 +
  // Resolved publication data
69 +
  publication?: Publication;
70 +
  // Metadata
71 +
  viewUrl?: string;  // Canonical URL (publication.url + path)
72 +
  pdsEndpoint?: string;  // PDS endpoint used for blob URLs
73 +
}
74 +
75 +
// Database row for resolved_documents table
76 +
export interface ResolvedDocumentRow {
77 +
  uri: string;
78 +
  did: string;
79 +
  rkey: string;
80 +
  title: string | null;
81 +
  description: string | null;
40 82
  path: string | null;
41 83
  site: string | null;
42 -
  content: unknown;
43 -
  textContent: string | null;
44 -
  publishedAt: string | null;
45 -
  viewUrl: string | null;
84 +
  content: string | null;
85 +
  text_content: string | null;
86 +
  cover_image_cid: string | null;
87 +
  cover_image_url: string | null;
88 +
  bsky_post_ref: string | null;
89 +
  tags: string | null;
90 +
  published_at: string | null;
91 +
  updated_at: string | null;
92 +
  pub_url: string | null;
93 +
  pub_name: string | null;
94 +
  pub_description: string | null;
95 +
  pub_icon_cid: string | null;
96 +
  pub_icon_url: string | null;
97 +
  view_url: string | null;
98 +
  pds_endpoint: string | null;
99 +
  resolved_at: string | null;
100 +
  stale_at: string | null;
46 101
}
packages/server/src/utils/blob.ts (added) +35 −0
1 +
/**
2 +
 * Constructs a full URL to fetch a blob from a PDS.
3 +
 * Format: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid}
4 +
 */
5 +
export function buildBlobUrl(pds: string, did: string, cid: string): string {
6 +
  const baseUrl = pds.endsWith("/") ? pds.slice(0, -1) : pds;
7 +
  return `${baseUrl}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
8 +
}
9 +
10 +
/**
11 +
 * Extracts the CID from a blob reference object.
12 +
 * Blob refs can be in different formats:
13 +
 * - { $link: "cid" } (legacy)
14 +
 * - { ref: { $link: "cid" } } (current)
15 +
 * - { cid: "cid" } (simple)
16 +
 */
17 +
export function extractBlobCid(blob: unknown): string | null {
18 +
  if (!blob || typeof blob !== "object") return null;
19 +
20 +
  const b = blob as Record<string, unknown>;
21 +
22 +
  // Current format: { ref: { $link: "cid" } }
23 +
  if (b.ref && typeof b.ref === "object") {
24 +
    const ref = b.ref as Record<string, unknown>;
25 +
    if (typeof ref.$link === "string") return ref.$link;
26 +
  }
27 +
28 +
  // Legacy format: { $link: "cid" }
29 +
  if (typeof b.$link === "string") return b.$link;
30 +
31 +
  // Simple format: { cid: "cid" }
32 +
  if (typeof b.cid === "string") return b.cid;
33 +
34 +
  return null;
35 +
}
packages/server/src/utils/document.ts +156 −59
1 1
import { resolvePds } from "./resolver";
2 2
import { parseAtUri } from "./at-uri";
3 +
import { buildBlobUrl, extractBlobCid } from "./blob";
3 4
4 -
export async function resolveViewUrl(
5 +
// Raw document record from PDS
6 +
interface DocumentRecord {
7 +
  site?: string;
8 +
  path?: string;
9 +
  title?: string;
10 +
  description?: string;
11 +
  coverImage?: unknown;
12 +
  content?: unknown;
13 +
  textContent?: string;
14 +
  bskyPostRef?: { uri: string; cid: string };
15 +
  tags?: string[];
16 +
  publishedAt?: string;
17 +
  updatedAt?: string;
18 +
}
19 +
20 +
// Raw publication record from PDS
21 +
interface PublicationRecord {
22 +
  url?: string;
23 +
  name?: string;
24 +
  description?: string;
25 +
  icon?: unknown;
26 +
}
27 +
28 +
// Resolved publication data
29 +
interface ResolvedPublication {
30 +
  url: string;
31 +
  name: string;
32 +
  description: string | null;
33 +
  iconCid: string | null;
34 +
  iconUrl: string | null;
35 +
}
36 +
37 +
/**
38 +
 * Fetches a publication record from an at:// URI
39 +
 */
40 +
async function fetchPublication(
5 41
  db: D1Database,
6 -
  siteUri: string,
7 -
  path: string
8 -
): Promise<string | null> {
42 +
  siteUri: string
43 +
): Promise<ResolvedPublication | null> {
9 44
  const parsed = parseAtUri(siteUri);
10 45
  if (!parsed) return null;
11 46
18 53
    )}&collection=${encodeURIComponent(parsed.collection)}&rkey=${encodeURIComponent(
19 54
      parsed.rkey
20 55
    )}`;
56 +
21 57
    const response = await fetch(url);
22 58
    if (!response.ok) return null;
23 59
24 -
    const data = (await response.json()) as {
25 -
      value?: { url?: string; domain?: string };
26 -
    };
27 -
    const siteUrl = data.value?.url || data.value?.domain;
28 -
    if (!siteUrl) return null;
60 +
    const data = (await response.json()) as { value?: PublicationRecord };
61 +
    const pub = data.value;
62 +
    if (!pub?.url || !pub?.name) return null;
29 63
30 -
    const baseUrl = siteUrl.startsWith("http") ? siteUrl : `https://${siteUrl}`;
31 -
    return `${baseUrl}${path}`;
64 +
    const iconCid = extractBlobCid(pub.icon);
65 +
    const iconUrl = iconCid ? buildBlobUrl(pds, parsed.did, iconCid) : null;
66 +
67 +
    return {
68 +
      url: pub.url,
69 +
      name: pub.name,
70 +
      description: pub.description || null,
71 +
      iconCid,
72 +
      iconUrl,
73 +
    };
32 74
  } catch {
33 75
    return null;
34 76
  }
35 77
}
36 78
79 +
/**
80 +
 * Resolves the view URL for a document.
81 +
 * If site is an at:// URI, fetches the publication to get the base URL.
82 +
 * If site is an https:// URL, uses it directly.
83 +
 */
84 +
export async function resolveViewUrl(
85 +
  db: D1Database,
86 +
  siteUri: string,
87 +
  path: string
88 +
): Promise<string | null> {
89 +
  // Check if site is an at:// URI or direct URL
90 +
  if (siteUri.startsWith("at://")) {
91 +
    const pub = await fetchPublication(db, siteUri);
92 +
    if (!pub?.url) return null;
93 +
    const baseUrl = pub.url.startsWith("http") ? pub.url : `https://${pub.url}`;
94 +
    return `${baseUrl.replace(/\/$/, "")}${path}`;
95 +
  }
96 +
97 +
  // Direct URL
98 +
  const baseUrl = siteUri.startsWith("http") ? siteUri : `https://${siteUri}`;
99 +
  return `${baseUrl.replace(/\/$/, "")}${path}`;
100 +
}
101 +
102 +
/**
103 +
 * Processes a document record: fetches from PDS, resolves publication,
104 +
 * and stores all fields in resolved_documents table.
105 +
 */
37 106
export async function processDocument(
38 107
  db: D1Database,
39 108
  did: string,
48 117
      return;
49 118
    }
50 119
51 -
    // 2. Fetch Record
120 +
    // 2. Fetch Document Record
52 121
    const url = `${pds}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(
53 122
      did
54 123
    )}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`;
55 -
    
124 +
56 125
    const response = await fetch(url);
57 126
    if (!response.ok) {
58 127
      if (response.status === 404) {
59 -
         // Record deleted?
60 -
         console.warn(`Record not found: ${did}/${collection}/${rkey}`);
128 +
        console.warn(`Record not found: ${did}/${collection}/${rkey}`);
61 129
      }
62 130
      return;
63 131
    }
65 133
    const data = (await response.json()) as {
66 134
      uri: string;
67 135
      cid?: string;
68 -
      value: {
69 -
        title?: string;
70 -
        path?: string;
71 -
        site?: string;
72 -
        content?: unknown;
73 -
        textContent?: string;
74 -
        publishedAt?: string;
75 -
        [key: string]: unknown;
76 -
      };
136 +
      value: DocumentRecord;
77 137
    };
78 138
79 139
    const { value, cid } = data;
90 150
      .bind(did, rkey, collection, cid || null, cid || null)
91 151
      .run();
92 152
93 -
    // 4. Resolve View URL and Update resolved_documents
94 -
    const uri = `at://${did}/${collection}/${rkey}`;
153 +
    // 4. Extract document fields
154 +
    const title = value.title || null;
155 +
    const description = value.description || null;
156 +
    const path = value.path || null;
157 +
    const site = value.site || null;
158 +
    const content = value.content ? JSON.stringify(value.content) : null;
159 +
    const textContent = value.textContent || null;
160 +
    const coverImageCid = extractBlobCid(value.coverImage);
161 +
    const coverImageUrl = coverImageCid ? buildBlobUrl(pds, did, coverImageCid) : null;
162 +
    const bskyPostRef = value.bskyPostRef ? JSON.stringify(value.bskyPostRef) : null;
163 +
    const tags = value.tags ? JSON.stringify(value.tags) : null;
164 +
    const publishedAt = value.publishedAt || null;
165 +
    const updatedAt = value.updatedAt || null;
166 +
167 +
    // 5. Resolve publication if site is at:// URI
168 +
    let pubUrl: string | null = null;
169 +
    let pubName: string | null = null;
170 +
    let pubDescription: string | null = null;
171 +
    let pubIconCid: string | null = null;
172 +
    let pubIconUrl: string | null = null;
95 173
    let viewUrl: string | null = null;
96 -
    if (value.site && value.path) {
97 -
      viewUrl = await resolveViewUrl(db, value.site, value.path);
174 +
175 +
    if (site) {
176 +
      if (site.startsWith("at://")) {
177 +
        // Fetch publication record
178 +
        const pub = await fetchPublication(db, site);
179 +
        if (pub) {
180 +
          pubUrl = pub.url;
181 +
          pubName = pub.name;
182 +
          pubDescription = pub.description;
183 +
          pubIconCid = pub.iconCid;
184 +
          pubIconUrl = pub.iconUrl;
185 +
          // Construct view URL
186 +
          if (pubUrl && path) {
187 +
            const baseUrl = pubUrl.startsWith("http") ? pubUrl : `https://${pubUrl}`;
188 +
            viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`;
189 +
          }
190 +
        }
191 +
      } else {
192 +
        // Site is a direct URL (loose document)
193 +
        pubUrl = site;
194 +
        if (path) {
195 +
          const baseUrl = site.startsWith("http") ? site : `https://${site}`;
196 +
          viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`;
197 +
        }
198 +
      }
98 199
    }
99 200
100 -
    // Set stale_at to 12 hours from now
201 +
    // 6. Insert/update resolved_documents
202 +
    const uri = `at://${did}/${collection}/${rkey}`;
101 203
    const STALE_OFFSET_HOURS = 12;
102 204
103 205
    await db
104 206
      .prepare(
105 -
        `INSERT INTO resolved_documents (uri, did, rkey, title, path, site, content, text_content, published_at, view_url, resolved_at, stale_at)
106 -
         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours'))
107 -
         ON CONFLICT(uri) DO UPDATE SET
108 -
           title = ?,
109 -
           path = ?,
110 -
           site = ?,
111 -
           content = ?,
112 -
           text_content = ?,
113 -
           published_at = ?,
114 -
           view_url = ?,
115 -
           resolved_at = datetime('now'),
116 -
           stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')`
207 +
        `INSERT INTO resolved_documents (
208 +
          uri, did, rkey, title, description, path, site, content, text_content,
209 +
          cover_image_cid, cover_image_url, bsky_post_ref, tags,
210 +
          published_at, updated_at, pub_url, pub_name, pub_description,
211 +
          pub_icon_cid, pub_icon_url, view_url, pds_endpoint,
212 +
          resolved_at, stale_at
213 +
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours'))
214 +
        ON CONFLICT(uri) DO UPDATE SET
215 +
          title = ?, description = ?, path = ?, site = ?, content = ?, text_content = ?,
216 +
          cover_image_cid = ?, cover_image_url = ?, bsky_post_ref = ?, tags = ?,
217 +
          published_at = ?, updated_at = ?, pub_url = ?, pub_name = ?, pub_description = ?,
218 +
          pub_icon_cid = ?, pub_icon_url = ?, view_url = ?, pds_endpoint = ?,
219 +
          resolved_at = datetime('now'), stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')`
117 220
      )
118 221
      .bind(
119 -
        uri,
120 -
        did,
121 -
        rkey,
122 -
        value.title || null,
123 -
        value.path || null,
124 -
        value.site || null,
125 -
        value.content ? JSON.stringify(value.content) : null,
126 -
        value.textContent || null,
127 -
        value.publishedAt || null,
128 -
        viewUrl,
129 -
        // Update bindings
130 -
        value.title || null,
131 -
        value.path || null,
132 -
        value.site || null,
133 -
        value.content ? JSON.stringify(value.content) : null,
134 -
        value.textContent || null,
135 -
        value.publishedAt || null,
136 -
        viewUrl
222 +
        // INSERT values
223 +
        uri, did, rkey, title, description, path, site, content, textContent,
224 +
        coverImageCid, coverImageUrl, bskyPostRef, tags,
225 +
        publishedAt, updatedAt, pubUrl, pubName, pubDescription,
226 +
        pubIconCid, pubIconUrl, viewUrl, pds,
227 +
        // UPDATE values
228 +
        title, description, path, site, content, textContent,
229 +
        coverImageCid, coverImageUrl, bskyPostRef, tags,
230 +
        publishedAt, updatedAt, pubUrl, pubName, pubDescription,
231 +
        pubIconCid, pubIconUrl, viewUrl, pds
137 232
      )
138 233
      .run();
234 +
235 +
    console.log(`Processed document: ${uri}`);
139 236
  } catch (error) {
140 237
    console.error(`Error processing document ${did}/${collection}/${rkey}:`, error);
141 238
  }
packages/server/src/utils/index.ts +1 −0
1 1
export { parseAtUri, buildAtUri, type AtUriComponents } from "./at-uri";
2 2
export { resolvePds } from "./resolver";
3 3
export { resolveViewUrl, processDocument } from "./document";
4 +
export { buildBlobUrl, extractBlobCid } from "./blob";