packages/cli/src/lib/markdown.ts 10.4 K raw
1
import * as fs from "node:fs/promises";
2
import * as path from "node:path";
3
import { glob } from "glob";
4
import { minimatch } from "minimatch";
5
import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
6
7
export function parseFrontmatter(
8
	content: string,
9
	mapping?: FrontmatterMapping,
10
): {
11
	frontmatter: PostFrontmatter;
12
	body: string;
13
	rawFrontmatter: Record<string, unknown>;
14
} {
15
	// Support multiple frontmatter delimiters:
16
	// --- (YAML) - Jekyll, Astro, most SSGs
17
	// +++ (TOML) - Hugo
18
	// *** - Alternative format
19
	const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
20
	const match = content.match(frontmatterRegex);
21
22
	if (!match) {
23
		throw new Error("Could not parse frontmatter");
24
	}
25
26
	const delimiter = match[1];
27
	const frontmatterStr = match[2] ?? "";
28
	const body = match[3] ?? "";
29
30
	// Determine format based on delimiter:
31
	// +++ uses TOML (key = value)
32
	// --- and *** use YAML (key: value)
33
	const isToml = delimiter === "+++";
34
	const separator = isToml ? "=" : ":";
35
36
	// Parse frontmatter manually
37
	const raw: Record<string, unknown> = {};
38
	const lines = frontmatterStr.split("\n");
39
40
	let i = 0;
41
	while (i < lines.length) {
42
		const line = lines[i];
43
		if (line === undefined) {
44
			i++;
45
			continue;
46
		}
47
		const sepIndex = line.indexOf(separator);
48
		if (sepIndex === -1) {
49
			i++;
50
			continue;
51
		}
52
53
		const key = line.slice(0, sepIndex).trim();
54
		let value = line.slice(sepIndex + 1).trim();
55
56
		// Handle quoted strings
57
		if (
58
			(value.startsWith('"') && value.endsWith('"')) ||
59
			(value.startsWith("'") && value.endsWith("'"))
60
		) {
61
			value = value.slice(1, -1);
62
		}
63
64
		// Handle inline arrays (simple case for tags)
65
		if (value.startsWith("[") && value.endsWith("]")) {
66
			const arrayContent = value.slice(1, -1);
67
			raw[key] = arrayContent
68
				.split(",")
69
				.map((item) => item.trim().replace(/^["']|["']$/g, ""));
70
		} else if (value === "" && !isToml) {
71
			// Check for YAML-style multiline array (key with no value followed by - items)
72
			const arrayItems: string[] = [];
73
			let j = i + 1;
74
			while (j < lines.length) {
75
				const nextLine = lines[j];
76
				if (nextLine === undefined) {
77
					j++;
78
					continue;
79
				}
80
				// Check if line is a list item (starts with whitespace and -)
81
				const listMatch = nextLine.match(/^\s+-\s*(.*)$/);
82
				if (listMatch && listMatch[1] !== undefined) {
83
					let itemValue = listMatch[1].trim();
84
					// Remove quotes if present
85
					if (
86
						(itemValue.startsWith('"') && itemValue.endsWith('"')) ||
87
						(itemValue.startsWith("'") && itemValue.endsWith("'"))
88
					) {
89
						itemValue = itemValue.slice(1, -1);
90
					}
91
					arrayItems.push(itemValue);
92
					j++;
93
				} else if (nextLine.trim() === "") {
94
					// Skip empty lines within the array
95
					j++;
96
				} else {
97
					// Hit a new key or non-list content
98
					break;
99
				}
100
			}
101
			if (arrayItems.length > 0) {
102
				raw[key] = arrayItems;
103
				i = j;
104
				continue;
105
			} else {
106
				raw[key] = value;
107
			}
108
		} else if (value === "true") {
109
			raw[key] = true;
110
		} else if (value === "false") {
111
			raw[key] = false;
112
		} else {
113
			raw[key] = value;
114
		}
115
		i++;
116
	}
117
118
	// Apply field mappings to normalize to standard PostFrontmatter fields
119
	const frontmatter: Record<string, unknown> = {};
120
121
	// Title mapping
122
	const titleField = mapping?.title || "title";
123
	frontmatter.title = raw[titleField] || raw.title;
124
125
	// Description mapping
126
	const descField = mapping?.description || "description";
127
	frontmatter.description = raw[descField] || raw.description;
128
129
	// Publish date mapping - check custom field first, then fallbacks
130
	const dateField = mapping?.publishDate;
131
	if (dateField && raw[dateField]) {
132
		frontmatter.publishDate = raw[dateField];
133
	} else if (raw.publishDate) {
134
		frontmatter.publishDate = raw.publishDate;
135
	} else {
136
		// Fallback to common date field names
137
		const dateFields = ["pubDate", "date", "createdAt", "created_at"];
138
		for (const field of dateFields) {
139
			if (raw[field]) {
140
				frontmatter.publishDate = raw[field];
141
				break;
142
			}
143
		}
144
	}
145
146
	// Cover image mapping
147
	const coverField = mapping?.coverImage || "ogImage";
148
	frontmatter.ogImage = raw[coverField] || raw.ogImage;
149
150
	// Tags mapping
151
	const tagsField = mapping?.tags || "tags";
152
	frontmatter.tags = raw[tagsField] || raw.tags;
153
154
	// Draft mapping
155
	const draftField = mapping?.draft || "draft";
156
	const draftValue = raw[draftField] ?? raw.draft;
157
	if (draftValue !== undefined) {
158
		frontmatter.draft = draftValue === true || draftValue === "true";
159
	}
160
161
	// Always preserve atUri (internal field)
162
	frontmatter.atUri = raw.atUri;
163
164
	return {
165
		frontmatter: frontmatter as unknown as PostFrontmatter,
166
		body,
167
		rawFrontmatter: raw,
168
	};
169
}
170
171
export function getSlugFromFilename(filename: string): string {
172
	return filename
173
		.replace(/\.mdx?$/, "")
174
		.toLowerCase()
175
		.replace(/\s+/g, "-");
176
}
177
178
export interface SlugOptions {
179
	slugField?: string;
180
	removeIndexFromSlug?: boolean;
181
	stripDatePrefix?: boolean;
182
}
183
184
export function getSlugFromOptions(
185
	relativePath: string,
186
	rawFrontmatter: Record<string, unknown>,
187
	options: SlugOptions = {},
188
): string {
189
	const {
190
		slugField,
191
		removeIndexFromSlug = false,
192
		stripDatePrefix = false,
193
	} = options;
194
195
	let slug: string;
196
197
	// If slugField is set, try to get the value from frontmatter
198
	if (slugField) {
199
		const frontmatterValue = rawFrontmatter[slugField];
200
		if (frontmatterValue && typeof frontmatterValue === "string") {
201
			// Remove leading slash if present
202
			slug = frontmatterValue
203
				.replace(/^\//, "")
204
				.toLowerCase()
205
				.replace(/\s+/g, "-");
206
		} else {
207
			// Fallback to filepath if frontmatter field not found
208
			slug = relativePath
209
				.replace(/\.mdx?$/, "")
210
				.toLowerCase()
211
				.replace(/\s+/g, "-");
212
		}
213
	} else {
214
		// Default: use filepath
215
		slug = relativePath
216
			.replace(/\.mdx?$/, "")
217
			.toLowerCase()
218
			.replace(/\s+/g, "-");
219
	}
220
221
	// Remove /index or /_index suffix if configured
222
	if (removeIndexFromSlug) {
223
		slug = slug.replace(/\/_?index$/, "");
224
	}
225
226
	// Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
227
	if (stripDatePrefix) {
228
		slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
229
	}
230
231
	return slug;
232
}
233
234
export async function getContentHash(content: string): Promise<string> {
235
	const encoder = new TextEncoder();
236
	const data = encoder.encode(content);
237
	const hashBuffer = await crypto.subtle.digest("SHA-256", data);
238
	const hashArray = Array.from(new Uint8Array(hashBuffer));
239
	return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
240
}
241
242
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
243
	for (const pattern of ignorePatterns) {
244
		if (minimatch(relativePath, pattern)) {
245
			return true;
246
		}
247
	}
248
	return false;
249
}
250
251
export interface ScanOptions {
252
	frontmatterMapping?: FrontmatterMapping;
253
	ignorePatterns?: string[];
254
	slugField?: string;
255
	removeIndexFromSlug?: boolean;
256
	stripDatePrefix?: boolean;
257
}
258
259
export async function scanContentDirectory(
260
	contentDir: string,
261
	frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
262
	ignorePatterns: string[] = [],
263
): Promise<BlogPost[]> {
264
	// Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
265
	let options: ScanOptions;
266
	if (
267
		frontmatterMappingOrOptions &&
268
		("frontmatterMapping" in frontmatterMappingOrOptions ||
269
			"ignorePatterns" in frontmatterMappingOrOptions ||
270
			"slugField" in frontmatterMappingOrOptions)
271
	) {
272
		options = frontmatterMappingOrOptions as ScanOptions;
273
	} else {
274
		// Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
275
		options = {
276
			frontmatterMapping: frontmatterMappingOrOptions as
277
				| FrontmatterMapping
278
				| undefined,
279
			ignorePatterns,
280
		};
281
	}
282
283
	const {
284
		frontmatterMapping,
285
		ignorePatterns: ignore = [],
286
		slugField,
287
		removeIndexFromSlug,
288
		stripDatePrefix,
289
	} = options;
290
291
	const patterns = ["**/*.md", "**/*.mdx"];
292
	const posts: BlogPost[] = [];
293
294
	for (const pattern of patterns) {
295
		const files = await glob(pattern, {
296
			cwd: contentDir,
297
			absolute: false,
298
		});
299
300
		for (const relativePath of files) {
301
			// Skip files matching ignore patterns
302
			if (shouldIgnore(relativePath, ignore)) {
303
				continue;
304
			}
305
306
			const filePath = path.join(contentDir, relativePath);
307
			const rawContent = await fs.readFile(filePath, "utf-8");
308
309
			try {
310
				const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
311
					rawContent,
312
					frontmatterMapping,
313
				);
314
				const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
315
					slugField,
316
					removeIndexFromSlug,
317
					stripDatePrefix,
318
				});
319
320
				posts.push({
321
					filePath,
322
					slug,
323
					frontmatter,
324
					content: body,
325
					rawContent,
326
					rawFrontmatter,
327
				});
328
			} catch (error) {
329
				console.error(`Error parsing ${relativePath}:`, error);
330
			}
331
		}
332
	}
333
334
	// Sort by publish date (newest first)
335
	posts.sort((a, b) => {
336
		const dateA = new Date(a.frontmatter.publishDate);
337
		const dateB = new Date(b.frontmatter.publishDate);
338
		return dateB.getTime() - dateA.getTime();
339
	});
340
341
	return posts;
342
}
343
344
export function updateFrontmatterWithAtUri(
345
	rawContent: string,
346
	atUri: string,
347
): string {
348
	// Detect which delimiter is used (---, +++, or ***)
349
	const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
350
	const delimiter = delimiterMatch?.[1] ?? "---";
351
	const isToml = delimiter === "+++";
352
353
	// Format the atUri entry based on frontmatter type
354
	const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
355
356
	// Check if atUri already exists in frontmatter (handle both formats)
357
	if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
358
		// Replace existing atUri (match both YAML and TOML formats)
359
		return rawContent.replace(
360
			/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
361
			`${atUriEntry}\n`,
362
		);
363
	}
364
365
	// Insert atUri before the closing delimiter
366
	const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
367
	if (frontmatterEndIndex === -1) {
368
		throw new Error("Could not find frontmatter end");
369
	}
370
371
	const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
372
	const afterEnd = rawContent.slice(frontmatterEndIndex);
373
374
	return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
375
}
376
377
export function stripMarkdownForText(markdown: string): string {
378
	return markdown
379
		.replace(/#{1,6}\s/g, "") // Remove headers
380
		.replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
381
		.replace(/\*([^*]+)\*/g, "$1") // Remove italic
382
		.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
383
		.replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
384
		.replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
385
		.replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
386
		.replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
387
		.trim();
388
}