packages/cli/src/lib/markdown.ts 10.8 K raw
1
import * as fs from "node:fs/promises";
2
import * as path from "node:path";
3
import { glob } from "glob";
4
import yaml from "js-yaml";
5
import { minimatch } from "minimatch";
6
import * as toml from "smol-toml";
7
import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
8
9
export function parseFrontmatter(
10
	content: string,
11
	mapping?: FrontmatterMapping,
12
): {
13
	frontmatter: PostFrontmatter;
14
	body: string;
15
	rawFrontmatter: Record<string, unknown>;
16
} {
17
	// Support multiple frontmatter delimiters:
18
	// --- (YAML) - Jekyll, Astro, most SSGs
19
	// +++ (TOML) - Hugo
20
	// *** - Alternative format
21
	const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
22
	const match = content.match(frontmatterRegex);
23
24
	if (!match) {
25
		throw new Error("Could not parse frontmatter");
26
	}
27
28
	const delimiter = match[1];
29
	const frontmatterStr = match[2] ?? "";
30
	const body = match[3] ?? "";
31
32
	// Determine format based on delimiter:
33
	// +++ uses TOML (key = value)
34
	// --- and *** use YAML (key: value)
35
	const isToml = delimiter === "+++";
36
37
	// Parse frontmatter using the appropriate library
38
	let raw: Record<string, unknown>;
39
	if (isToml) {
40
		raw = toml.parse(frontmatterStr) as Record<string, unknown>;
41
	} else {
42
		// Use CORE_SCHEMA to keep dates as strings rather than Date objects
43
		raw =
44
			(yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<
45
				string,
46
				unknown
47
			>) ?? {};
48
	}
49
50
	// Apply field mappings to normalize to standard PostFrontmatter fields
51
	const frontmatter: Record<string, unknown> = {};
52
53
	// Title mapping
54
	const titleField = mapping?.title || "title";
55
	frontmatter.title = raw[titleField] || raw.title;
56
57
	// Description mapping
58
	const descField = mapping?.description || "description";
59
	frontmatter.description = raw[descField] || raw.description;
60
61
	// Publish date mapping - check custom field first, then fallbacks
62
	const dateField = mapping?.publishDate;
63
	if (dateField && raw[dateField]) {
64
		frontmatter.publishDate = raw[dateField];
65
	} else if (raw.publishDate) {
66
		frontmatter.publishDate = raw.publishDate;
67
	} else {
68
		// Fallback to common date field names
69
		const dateFields = ["pubDate", "date", "createdAt", "created_at"];
70
		for (const field of dateFields) {
71
			if (raw[field]) {
72
				frontmatter.publishDate = raw[field];
73
				break;
74
			}
75
		}
76
	}
77
78
	// Updated date mapping - check custom field first, then fallbacks
79
	const updatedAtField = mapping?.updatedAt;
80
	if (updatedAtField && raw[updatedAtField]) {
81
		frontmatter.updatedAt = raw[updatedAtField];
82
	} else if (raw.updatedAt) {
83
		frontmatter.updatedAt = raw.updatedAt;
84
	} else {
85
		// Fallback to common date field names
86
		const updatedAtFields = ["updated_at", "modifiedAt", "modified_at"];
87
		for (const field of updatedAtFields) {
88
			if (raw[field]) {
89
				frontmatter.updatedAt = raw[field];
90
				break;
91
			}
92
		}
93
	}
94
95
	// Cover image mapping
96
	const coverField = mapping?.coverImage || "ogImage";
97
	frontmatter.ogImage = raw[coverField] || raw.ogImage;
98
99
	// Tags mapping
100
	const tagsField = mapping?.tags || "tags";
101
	frontmatter.tags = raw[tagsField] || raw.tags;
102
103
	// Draft mapping
104
	const draftField = mapping?.draft || "draft";
105
	const draftValue = raw[draftField] ?? raw.draft;
106
	if (draftValue !== undefined) {
107
		frontmatter.draft = draftValue === true || draftValue === "true";
108
	}
109
110
	// Always preserve atUri (internal field)
111
	frontmatter.atUri = raw.atUri;
112
113
	return {
114
		frontmatter: frontmatter as unknown as PostFrontmatter,
115
		body,
116
		rawFrontmatter: raw,
117
	};
118
}
119
120
export function getSlugFromFilename(filename: string): string {
121
	return filename
122
		.replace(/\.mdx?$/, "")
123
		.toLowerCase()
124
		.replace(/\s+/g, "-");
125
}
126
127
export interface SlugOptions {
128
	slugField?: string;
129
	removeIndexFromSlug?: boolean;
130
	stripDatePrefix?: boolean;
131
}
132
133
export function getSlugFromOptions(
134
	relativePath: string,
135
	rawFrontmatter: Record<string, unknown>,
136
	options: SlugOptions = {},
137
): string {
138
	const {
139
		slugField,
140
		removeIndexFromSlug = false,
141
		stripDatePrefix = false,
142
	} = options;
143
144
	let slug: string;
145
146
	// If slugField is set, try to get the value from frontmatter
147
	if (slugField) {
148
		const frontmatterValue = rawFrontmatter[slugField];
149
		if (frontmatterValue && typeof frontmatterValue === "string") {
150
			// Remove leading slash if present
151
			slug = frontmatterValue
152
				.replace(/^\//, "")
153
				.toLowerCase()
154
				.replace(/\s+/g, "-");
155
		} else {
156
			// Fallback to filepath if frontmatter field not found
157
			slug = relativePath
158
				.replace(/\.mdx?$/, "")
159
				.toLowerCase()
160
				.replace(/\s+/g, "-");
161
		}
162
	} else {
163
		// Default: use filepath
164
		slug = relativePath
165
			.replace(/\.mdx?$/, "")
166
			.toLowerCase()
167
			.replace(/\s+/g, "-");
168
	}
169
170
	// Remove /index or /_index suffix if configured
171
	if (removeIndexFromSlug) {
172
		slug = slug.replace(/\/_?index$/, "");
173
	}
174
175
	// Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
176
	if (stripDatePrefix) {
177
		slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
178
	}
179
180
	return slug;
181
}
182
183
export function resolvePathTemplate(template: string, post: BlogPost): string {
184
	const publishDate = new Date(post.frontmatter.publishDate);
185
	const year = String(publishDate.getFullYear());
186
	const yearUTC = String(publishDate.getUTCFullYear());
187
	const month = String(publishDate.getMonth() + 1).padStart(2, "0");
188
	const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0");
189
	const day = String(publishDate.getDate()).padStart(2, "0");
190
	const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0");
191
192
	const slugifiedTitle = (post.frontmatter.title || "")
193
		.toLowerCase()
194
		.replace(/\s+/g, "-")
195
		.replace(/[^\w-]/g, "");
196
197
	// Replace known tokens
198
	let result = template
199
		.replace(/\{slug\}/g, post.slug)
200
		.replace(/\{year\}/g, year)
201
		.replace(/\{yearUTC\}/g, yearUTC)
202
		.replace(/\{month\}/g, month)
203
		.replace(/\{monthUTC\}/g, monthUTC)
204
		.replace(/\{day\}/g, day)
205
		.replace(/\{dayUTC\}/g, dayUTC)
206
		.replace(/\{title\}/g, slugifiedTitle);
207
208
	// Replace any remaining {field} tokens with raw frontmatter values
209
	result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
210
		const value = post.rawFrontmatter[field];
211
		if (value != null && typeof value === "string") {
212
			return value;
213
		}
214
		return "";
215
	});
216
217
	// Ensure leading slash
218
	if (!result.startsWith("/")) {
219
		result = `/${result}`;
220
	}
221
222
	return result;
223
}
224
225
export function resolvePostPath(
226
	post: BlogPost,
227
	pathPrefix?: string,
228
	pathTemplate?: string,
229
): string {
230
	if (pathTemplate) {
231
		return resolvePathTemplate(pathTemplate, post);
232
	}
233
	const prefix = pathPrefix || "/posts";
234
	return `${prefix}/${post.slug}`;
235
}
236
237
export async function getContentHash(content: string): Promise<string> {
238
	const encoder = new TextEncoder();
239
	const data = encoder.encode(content);
240
	const hashBuffer = await crypto.subtle.digest("SHA-256", data);
241
	const hashArray = Array.from(new Uint8Array(hashBuffer));
242
	return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
243
}
244
245
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
246
	for (const pattern of ignorePatterns) {
247
		if (minimatch(relativePath, pattern)) {
248
			return true;
249
		}
250
	}
251
	return false;
252
}
253
254
export interface ScanOptions {
255
	frontmatterMapping?: FrontmatterMapping;
256
	ignorePatterns?: string[];
257
	slugField?: string;
258
	removeIndexFromSlug?: boolean;
259
	stripDatePrefix?: boolean;
260
}
261
262
export async function scanContentDirectory(
263
	contentDir: string,
264
	frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
265
	ignorePatterns: string[] = [],
266
): Promise<BlogPost[]> {
267
	// Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
268
	let options: ScanOptions;
269
	if (
270
		frontmatterMappingOrOptions &&
271
		("frontmatterMapping" in frontmatterMappingOrOptions ||
272
			"ignorePatterns" in frontmatterMappingOrOptions ||
273
			"slugField" in frontmatterMappingOrOptions)
274
	) {
275
		options = frontmatterMappingOrOptions as ScanOptions;
276
	} else {
277
		// Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
278
		options = {
279
			frontmatterMapping: frontmatterMappingOrOptions as
280
				| FrontmatterMapping
281
				| undefined,
282
			ignorePatterns,
283
		};
284
	}
285
286
	const {
287
		frontmatterMapping,
288
		ignorePatterns: ignore = [],
289
		slugField,
290
		removeIndexFromSlug,
291
		stripDatePrefix,
292
	} = options;
293
294
	const patterns = ["**/*.md", "**/*.mdx"];
295
	const posts: BlogPost[] = [];
296
297
	for (const pattern of patterns) {
298
		const files = await glob(pattern, {
299
			cwd: contentDir,
300
			absolute: false,
301
		});
302
303
		for (const relativePath of files) {
304
			// Skip files matching ignore patterns
305
			if (shouldIgnore(relativePath, ignore)) {
306
				continue;
307
			}
308
309
			const filePath = path.join(contentDir, relativePath);
310
			const rawContent = await fs.readFile(filePath, "utf-8");
311
312
			try {
313
				const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
314
					rawContent,
315
					frontmatterMapping,
316
				);
317
				const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
318
					slugField,
319
					removeIndexFromSlug,
320
					stripDatePrefix,
321
				});
322
323
				posts.push({
324
					filePath,
325
					slug,
326
					frontmatter,
327
					content: body,
328
					rawContent,
329
					rawFrontmatter,
330
				});
331
			} catch (error) {
332
				console.error(`Error parsing ${relativePath}:`, error);
333
			}
334
		}
335
	}
336
337
	// Sort by publish date (newest first)
338
	posts.sort((a, b) => {
339
		const dateA = new Date(a.frontmatter.publishDate);
340
		const dateB = new Date(b.frontmatter.publishDate);
341
		return dateB.getTime() - dateA.getTime();
342
	});
343
344
	return posts;
345
}
346
347
export function updateFrontmatterWithAtUri(
348
	rawContent: string,
349
	atUri: string,
350
): string {
351
	// Detect which delimiter is used (---, +++, or ***)
352
	const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
353
	const delimiter = delimiterMatch?.[1] ?? "---";
354
	const isToml = delimiter === "+++";
355
356
	// Format the atUri entry based on frontmatter type
357
	const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
358
359
	// Check if atUri already exists in frontmatter (handle both formats)
360
	if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
361
		// Replace existing atUri (match both YAML and TOML formats)
362
		return rawContent.replace(
363
			/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
364
			`${atUriEntry}\n`,
365
		);
366
	}
367
368
	// Insert atUri before the closing delimiter
369
	const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
370
	if (frontmatterEndIndex === -1) {
371
		throw new Error("Could not find frontmatter end");
372
	}
373
374
	const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
375
	const afterEnd = rawContent.slice(frontmatterEndIndex);
376
377
	return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
378
}
379
380
export function stripMarkdownForText(markdown: string): string {
381
	return markdown
382
		.replace(/#{1,6}\s/g, "") // Remove headers
383
		.replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
384
		.replace(/\*([^*]+)\*/g, "$1") // Remove italic
385
		.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
386
		.replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
387
		.replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
388
		.replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
389
		.replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
390
		.trim();
391
}