packages/cli/src/lib/markdown.ts 10.8 K raw
1
import * as fs from "node:fs/promises";
2
import * as path from "node:path";
3
import { glob } from "glob";
4
import yaml from "js-yaml";
5
import { minimatch } from "minimatch";
6
import * as toml from "smol-toml";
7
import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
8
9
export function parseFrontmatter(
10
	content: string,
11
	mapping?: FrontmatterMapping,
12
): {
13
	frontmatter: PostFrontmatter;
14
	body: string;
15
	rawFrontmatter: Record<string, unknown>;
16
} {
17
	// Support multiple frontmatter delimiters:
18
	// --- (YAML) - Jekyll, Astro, most SSGs
19
	// +++ (TOML) - Hugo
20
	// *** - Alternative format
21
	const frontmatterRegex =
22
		/^(---|\+\+\+|\*\*\*)\r?\n([\s\S]*?)\r?\n\1\r?\n([\s\S]*)$/;
23
	const match = content.match(frontmatterRegex);
24
25
	if (!match) {
26
		throw new Error("Could not parse frontmatter");
27
	}
28
29
	const delimiter = match[1];
30
	const frontmatterStr = match[2] ?? "";
31
	const body = match[3] ?? "";
32
33
	// Determine format based on delimiter:
34
	// +++ uses TOML (key = value)
35
	// --- and *** use YAML (key: value)
36
	const isToml = delimiter === "+++";
37
38
	// Parse frontmatter using the appropriate library
39
	let raw: Record<string, unknown>;
40
	if (isToml) {
41
		raw = toml.parse(frontmatterStr) as Record<string, unknown>;
42
	} else {
43
		// Use CORE_SCHEMA to keep dates as strings rather than Date objects
44
		raw =
45
			(yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record<
46
				string,
47
				unknown
48
			>) ?? {};
49
	}
50
51
	// Apply field mappings to normalize to standard PostFrontmatter fields
52
	const frontmatter: Record<string, unknown> = {};
53
54
	// Title mapping
55
	const titleField = mapping?.title || "title";
56
	frontmatter.title = raw[titleField] || raw.title;
57
58
	// Description mapping
59
	const descField = mapping?.description || "description";
60
	frontmatter.description = raw[descField] || raw.description;
61
62
	// Publish date mapping - check custom field first, then fallbacks
63
	const dateField = mapping?.publishDate;
64
	if (dateField && raw[dateField]) {
65
		frontmatter.publishDate = raw[dateField];
66
	} else if (raw.publishDate) {
67
		frontmatter.publishDate = raw.publishDate;
68
	} else {
69
		// Fallback to common date field names
70
		const dateFields = ["pubDate", "date", "createdAt", "created_at"];
71
		for (const field of dateFields) {
72
			if (raw[field]) {
73
				frontmatter.publishDate = raw[field];
74
				break;
75
			}
76
		}
77
	}
78
79
	// Updated date mapping - check custom field first, then fallbacks
80
	const updatedAtField = mapping?.updatedAt;
81
	if (updatedAtField && raw[updatedAtField]) {
82
		frontmatter.updatedAt = raw[updatedAtField];
83
	} else if (raw.updatedAt) {
84
		frontmatter.updatedAt = raw.updatedAt;
85
	} else {
86
		// Fallback to common date field names
87
		const updatedAtFields = ["updated_at", "modifiedAt", "modified_at"];
88
		for (const field of updatedAtFields) {
89
			if (raw[field]) {
90
				frontmatter.updatedAt = raw[field];
91
				break;
92
			}
93
		}
94
	}
95
96
	// Cover image mapping
97
	const coverField = mapping?.coverImage || "ogImage";
98
	frontmatter.ogImage = raw[coverField] || raw.ogImage;
99
100
	// Tags mapping
101
	const tagsField = mapping?.tags || "tags";
102
	frontmatter.tags = raw[tagsField] || raw.tags;
103
104
	// Draft mapping
105
	const draftField = mapping?.draft || "draft";
106
	const draftValue = raw[draftField] ?? raw.draft;
107
	if (draftValue !== undefined) {
108
		frontmatter.draft = draftValue === true || draftValue === "true";
109
	}
110
111
	// Always preserve atUri (internal field)
112
	frontmatter.atUri = raw.atUri;
113
114
	return {
115
		frontmatter: frontmatter as unknown as PostFrontmatter,
116
		body,
117
		rawFrontmatter: raw,
118
	};
119
}
120
121
export function getSlugFromFilename(filename: string): string {
122
	return filename
123
		.replace(/\.mdx?$/, "")
124
		.toLowerCase()
125
		.replace(/\s+/g, "-");
126
}
127
128
export interface SlugOptions {
129
	slugField?: string;
130
	removeIndexFromSlug?: boolean;
131
	stripDatePrefix?: boolean;
132
}
133
134
export function getSlugFromOptions(
135
	relativePath: string,
136
	rawFrontmatter: Record<string, unknown>,
137
	options: SlugOptions = {},
138
): string {
139
	const {
140
		slugField,
141
		removeIndexFromSlug = false,
142
		stripDatePrefix = false,
143
	} = options;
144
145
	let slug: string;
146
147
	// If slugField is set, try to get the value from frontmatter
148
	if (slugField) {
149
		const frontmatterValue = rawFrontmatter[slugField];
150
		if (frontmatterValue && typeof frontmatterValue === "string") {
151
			// Remove leading slash if present
152
			slug = frontmatterValue
153
				.replace(/^\//, "")
154
				.toLowerCase()
155
				.replace(/\s+/g, "-");
156
		} else {
157
			// Fallback to filepath if frontmatter field not found
158
			slug = relativePath
159
				.replace(/\.mdx?$/, "")
160
				.toLowerCase()
161
				.replace(/\s+/g, "-");
162
		}
163
	} else {
164
		// Default: use filepath
165
		slug = relativePath
166
			.replace(/\.mdx?$/, "")
167
			.toLowerCase()
168
			.replace(/\s+/g, "-");
169
	}
170
171
	// Remove /index or /_index suffix if configured
172
	if (removeIndexFromSlug) {
173
		slug = slug.replace(/\/_?index$/, "");
174
	}
175
176
	// Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
177
	if (stripDatePrefix) {
178
		slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
179
	}
180
181
	return slug;
182
}
183
184
export function resolvePathTemplate(template: string, post: BlogPost): string {
185
	const publishDate = new Date(post.frontmatter.publishDate);
186
	const year = String(publishDate.getFullYear());
187
	const yearUTC = String(publishDate.getUTCFullYear());
188
	const month = String(publishDate.getMonth() + 1).padStart(2, "0");
189
	const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0");
190
	const day = String(publishDate.getDate()).padStart(2, "0");
191
	const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0");
192
193
	const slugifiedTitle = (post.frontmatter.title || "")
194
		.toLowerCase()
195
		.replace(/\s+/g, "-")
196
		.replace(/[^\w-]/g, "");
197
198
	// Replace known tokens
199
	let result = template
200
		.replace(/\{slug\}/g, post.slug)
201
		.replace(/\{year\}/g, year)
202
		.replace(/\{yearUTC\}/g, yearUTC)
203
		.replace(/\{month\}/g, month)
204
		.replace(/\{monthUTC\}/g, monthUTC)
205
		.replace(/\{day\}/g, day)
206
		.replace(/\{dayUTC\}/g, dayUTC)
207
		.replace(/\{title\}/g, slugifiedTitle);
208
209
	// Replace any remaining {field} tokens with raw frontmatter values
210
	result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
211
		const value = post.rawFrontmatter[field];
212
		if (value != null && typeof value === "string") {
213
			return value;
214
		}
215
		return "";
216
	});
217
218
	// Ensure leading slash
219
	if (!result.startsWith("/")) {
220
		result = `/${result}`;
221
	}
222
223
	return result;
224
}
225
226
export function resolvePostPath(
227
	post: BlogPost,
228
	pathPrefix?: string,
229
	pathTemplate?: string,
230
): string {
231
	if (pathTemplate) {
232
		return resolvePathTemplate(pathTemplate, post);
233
	}
234
	const prefix = pathPrefix ?? "/posts";
235
	return prefix ? `${prefix}/${post.slug}` : `/${post.slug}`;
236
}
237
238
export async function getContentHash(content: string): Promise<string> {
239
	const encoder = new TextEncoder();
240
	const data = encoder.encode(content);
241
	const hashBuffer = await crypto.subtle.digest("SHA-256", data);
242
	const hashArray = Array.from(new Uint8Array(hashBuffer));
243
	return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
244
}
245
246
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
247
	for (const pattern of ignorePatterns) {
248
		if (minimatch(relativePath, pattern)) {
249
			return true;
250
		}
251
	}
252
	return false;
253
}
254
255
export interface ScanOptions {
256
	frontmatterMapping?: FrontmatterMapping;
257
	ignorePatterns?: string[];
258
	slugField?: string;
259
	removeIndexFromSlug?: boolean;
260
	stripDatePrefix?: boolean;
261
}
262
263
export async function scanContentDirectory(
264
	contentDir: string,
265
	frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
266
	ignorePatterns: string[] = [],
267
): Promise<BlogPost[]> {
268
	// Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
269
	let options: ScanOptions;
270
	if (
271
		frontmatterMappingOrOptions &&
272
		("frontmatterMapping" in frontmatterMappingOrOptions ||
273
			"ignorePatterns" in frontmatterMappingOrOptions ||
274
			"slugField" in frontmatterMappingOrOptions)
275
	) {
276
		options = frontmatterMappingOrOptions as ScanOptions;
277
	} else {
278
		// Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
279
		options = {
280
			frontmatterMapping: frontmatterMappingOrOptions as
281
				| FrontmatterMapping
282
				| undefined,
283
			ignorePatterns,
284
		};
285
	}
286
287
	const {
288
		frontmatterMapping,
289
		ignorePatterns: ignore = [],
290
		slugField,
291
		removeIndexFromSlug,
292
		stripDatePrefix,
293
	} = options;
294
295
	const patterns = ["**/*.md", "**/*.mdx"];
296
	const posts: BlogPost[] = [];
297
298
	for (const pattern of patterns) {
299
		const files = await glob(pattern, {
300
			cwd: contentDir,
301
			absolute: false,
302
		});
303
304
		for (const relativePath of files) {
305
			// Skip files matching ignore patterns
306
			if (shouldIgnore(relativePath, ignore)) {
307
				continue;
308
			}
309
310
			const filePath = path.join(contentDir, relativePath);
311
			const rawContent = await fs.readFile(filePath, "utf-8");
312
313
			try {
314
				const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
315
					rawContent,
316
					frontmatterMapping,
317
				);
318
				const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
319
					slugField,
320
					removeIndexFromSlug,
321
					stripDatePrefix,
322
				});
323
324
				posts.push({
325
					filePath,
326
					slug,
327
					frontmatter,
328
					content: body,
329
					rawContent,
330
					rawFrontmatter,
331
				});
332
			} catch (error) {
333
				console.error(`Error parsing ${relativePath}:`, error);
334
			}
335
		}
336
	}
337
338
	// Sort by publish date (newest first)
339
	posts.sort((a, b) => {
340
		const dateA = new Date(a.frontmatter.publishDate);
341
		const dateB = new Date(b.frontmatter.publishDate);
342
		return dateB.getTime() - dateA.getTime();
343
	});
344
345
	return posts;
346
}
347
348
export function updateFrontmatterWithAtUri(
349
	rawContent: string,
350
	atUri: string,
351
): string {
352
	// Detect which delimiter is used (---, +++, or ***)
353
	const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
354
	const delimiter = delimiterMatch?.[1] ?? "---";
355
	const isToml = delimiter === "+++";
356
357
	// Format the atUri entry based on frontmatter type
358
	const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
359
360
	// Check if atUri already exists in frontmatter (handle both formats)
361
	if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
362
		// Replace existing atUri (match both YAML and TOML formats)
363
		return rawContent.replace(
364
			/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
365
			`${atUriEntry}\n`,
366
		);
367
	}
368
369
	// Insert atUri before the closing delimiter
370
	const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
371
	if (frontmatterEndIndex === -1) {
372
		throw new Error("Could not find frontmatter end");
373
	}
374
375
	const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
376
	const afterEnd = rawContent.slice(frontmatterEndIndex);
377
378
	return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
379
}
380
381
export function stripMarkdownForText(markdown: string): string {
382
	return markdown
383
		.replace(/#{1,6}\s/g, "") // Remove headers
384
		.replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
385
		.replace(/\*([^*]+)\*/g, "$1") // Remove italic
386
		.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
387
		.replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
388
		.replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
389
		.replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
390
		.replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
391
		.trim();
392
}