packages/cli/src/lib/markdown.ts 10.1 K raw
1
import * as fs from "node:fs/promises";
2
import * as path from "node:path";
3
import { glob } from "glob";
4
import { minimatch } from "minimatch";
5
import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
6
7
export function parseFrontmatter(
8
	content: string,
9
	mapping?: FrontmatterMapping,
10
): {
11
	frontmatter: PostFrontmatter;
12
	body: string;
13
	rawFrontmatter: Record<string, unknown>;
14
} {
15
	// Support multiple frontmatter delimiters:
16
	// --- (YAML) - Jekyll, Astro, most SSGs
17
	// +++ (TOML) - Hugo
18
	// *** - Alternative format
19
	const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
20
	const match = content.match(frontmatterRegex);
21
22
	if (!match) {
23
		throw new Error("Could not parse frontmatter");
24
	}
25
26
	const delimiter = match[1];
27
	const frontmatterStr = match[2] ?? "";
28
	const body = match[3] ?? "";
29
30
	// Determine format based on delimiter:
31
	// +++ uses TOML (key = value)
32
	// --- and *** use YAML (key: value)
33
	const isToml = delimiter === "+++";
34
	const separator = isToml ? "=" : ":";
35
36
	// Parse frontmatter manually
37
	const raw: Record<string, unknown> = {};
38
	const lines = frontmatterStr.split("\n");
39
40
	let i = 0;
41
	while (i < lines.length) {
42
		const line = lines[i];
43
		if (line === undefined) {
44
			i++;
45
			continue;
46
		}
47
		const sepIndex = line.indexOf(separator);
48
		if (sepIndex === -1) {
49
			i++;
50
			continue;
51
		}
52
53
		const key = line.slice(0, sepIndex).trim();
54
		let value = line.slice(sepIndex + 1).trim();
55
56
		// Handle quoted strings
57
		if (
58
			(value.startsWith('"') && value.endsWith('"')) ||
59
			(value.startsWith("'") && value.endsWith("'"))
60
		) {
61
			value = value.slice(1, -1);
62
		}
63
64
		// Handle inline arrays (simple case for tags)
65
		if (value.startsWith("[") && value.endsWith("]")) {
66
			const arrayContent = value.slice(1, -1);
67
			raw[key] = arrayContent
68
				.split(",")
69
				.map((item) => item.trim().replace(/^["']|["']$/g, ""));
70
		} else if (value === "" && !isToml) {
71
			// Check for YAML-style multiline array (key with no value followed by - items)
72
			const arrayItems: string[] = [];
73
			let j = i + 1;
74
			while (j < lines.length) {
75
				const nextLine = lines[j];
76
				if (nextLine === undefined) {
77
					j++;
78
					continue;
79
				}
80
				// Check if line is a list item (starts with whitespace and -)
81
				const listMatch = nextLine.match(/^\s+-\s*(.*)$/);
82
				if (listMatch && listMatch[1] !== undefined) {
83
					let itemValue = listMatch[1].trim();
84
					// Remove quotes if present
85
					if (
86
						(itemValue.startsWith('"') && itemValue.endsWith('"')) ||
87
						(itemValue.startsWith("'") && itemValue.endsWith("'"))
88
					) {
89
						itemValue = itemValue.slice(1, -1);
90
					}
91
					arrayItems.push(itemValue);
92
					j++;
93
				} else if (nextLine.trim() === "") {
94
					// Skip empty lines within the array
95
					j++;
96
				} else {
97
					// Hit a new key or non-list content
98
					break;
99
				}
100
			}
101
			if (arrayItems.length > 0) {
102
				raw[key] = arrayItems;
103
				i = j;
104
				continue;
105
			} else {
106
				raw[key] = value;
107
			}
108
		} else if (value === "true") {
109
			raw[key] = true;
110
		} else if (value === "false") {
111
			raw[key] = false;
112
		} else {
113
			raw[key] = value;
114
		}
115
		i++;
116
	}
117
118
	// Apply field mappings to normalize to standard PostFrontmatter fields
119
	const frontmatter: Record<string, unknown> = {};
120
121
	// Title mapping
122
	const titleField = mapping?.title || "title";
123
	frontmatter.title = raw[titleField] || raw.title;
124
125
	// Description mapping
126
	const descField = mapping?.description || "description";
127
	frontmatter.description = raw[descField] || raw.description;
128
129
	// Publish date mapping - check custom field first, then fallbacks
130
	const dateField = mapping?.publishDate;
131
	if (dateField && raw[dateField]) {
132
		frontmatter.publishDate = raw[dateField];
133
	} else if (raw.publishDate) {
134
		frontmatter.publishDate = raw.publishDate;
135
	} else {
136
		// Fallback to common date field names
137
		const dateFields = ["pubDate", "date", "createdAt", "created_at"];
138
		for (const field of dateFields) {
139
			if (raw[field]) {
140
				frontmatter.publishDate = raw[field];
141
				break;
142
			}
143
		}
144
	}
145
146
	// Cover image mapping
147
	const coverField = mapping?.coverImage || "ogImage";
148
	frontmatter.ogImage = raw[coverField] || raw.ogImage;
149
150
	// Tags mapping
151
	const tagsField = mapping?.tags || "tags";
152
	frontmatter.tags = raw[tagsField] || raw.tags;
153
154
	// Draft mapping
155
	const draftField = mapping?.draft || "draft";
156
	const draftValue = raw[draftField] ?? raw.draft;
157
	if (draftValue !== undefined) {
158
		frontmatter.draft = draftValue === true || draftValue === "true";
159
	}
160
161
	// Always preserve atUri (internal field)
162
	frontmatter.atUri = raw.atUri;
163
164
	return {
165
		frontmatter: frontmatter as unknown as PostFrontmatter,
166
		body,
167
		rawFrontmatter: raw,
168
	};
169
}
170
171
export function getSlugFromFilename(filename: string): string {
172
	return filename
173
		.replace(/\.mdx?$/, "")
174
		.toLowerCase()
175
		.replace(/\s+/g, "-");
176
}
177
178
export interface SlugOptions {
179
	slugField?: string;
180
	removeIndexFromSlug?: boolean;
181
}
182
183
export function getSlugFromOptions(
184
	relativePath: string,
185
	rawFrontmatter: Record<string, unknown>,
186
	options: SlugOptions = {},
187
): string {
188
	const { slugField, removeIndexFromSlug = false } = options;
189
190
	let slug: string;
191
192
	// If slugField is set, try to get the value from frontmatter
193
	if (slugField) {
194
		const frontmatterValue = rawFrontmatter[slugField];
195
		if (frontmatterValue && typeof frontmatterValue === "string") {
196
			// Remove leading slash if present
197
			slug = frontmatterValue
198
				.replace(/^\//, "")
199
				.toLowerCase()
200
				.replace(/\s+/g, "-");
201
		} else {
202
			// Fallback to filepath if frontmatter field not found
203
			slug = relativePath
204
				.replace(/\.mdx?$/, "")
205
				.toLowerCase()
206
				.replace(/\s+/g, "-");
207
		}
208
	} else {
209
		// Default: use filepath
210
		slug = relativePath
211
			.replace(/\.mdx?$/, "")
212
			.toLowerCase()
213
			.replace(/\s+/g, "-");
214
	}
215
216
	// Remove /index or /_index suffix if configured
217
	if (removeIndexFromSlug) {
218
		slug = slug.replace(/\/_?index$/, "");
219
	}
220
221
	return slug;
222
}
223
224
export async function getContentHash(content: string): Promise<string> {
225
	const encoder = new TextEncoder();
226
	const data = encoder.encode(content);
227
	const hashBuffer = await crypto.subtle.digest("SHA-256", data);
228
	const hashArray = Array.from(new Uint8Array(hashBuffer));
229
	return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
230
}
231
232
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
233
	for (const pattern of ignorePatterns) {
234
		if (minimatch(relativePath, pattern)) {
235
			return true;
236
		}
237
	}
238
	return false;
239
}
240
241
export interface ScanOptions {
242
	frontmatterMapping?: FrontmatterMapping;
243
	ignorePatterns?: string[];
244
	slugField?: string;
245
	removeIndexFromSlug?: boolean;
246
}
247
248
export async function scanContentDirectory(
249
	contentDir: string,
250
	frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
251
	ignorePatterns: string[] = [],
252
): Promise<BlogPost[]> {
253
	// Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
254
	let options: ScanOptions;
255
	if (
256
		frontmatterMappingOrOptions &&
257
		("frontmatterMapping" in frontmatterMappingOrOptions ||
258
			"ignorePatterns" in frontmatterMappingOrOptions ||
259
			"slugField" in frontmatterMappingOrOptions)
260
	) {
261
		options = frontmatterMappingOrOptions as ScanOptions;
262
	} else {
263
		// Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
264
		options = {
265
			frontmatterMapping: frontmatterMappingOrOptions as
266
				| FrontmatterMapping
267
				| undefined,
268
			ignorePatterns,
269
		};
270
	}
271
272
	const {
273
		frontmatterMapping,
274
		ignorePatterns: ignore = [],
275
		slugField,
276
		removeIndexFromSlug,
277
	} = options;
278
279
	const patterns = ["**/*.md", "**/*.mdx"];
280
	const posts: BlogPost[] = [];
281
282
	for (const pattern of patterns) {
283
		const files = await glob(pattern, {
284
			cwd: contentDir,
285
			absolute: false,
286
		});
287
288
		for (const relativePath of files) {
289
			// Skip files matching ignore patterns
290
			if (shouldIgnore(relativePath, ignore)) {
291
				continue;
292
			}
293
294
			const filePath = path.join(contentDir, relativePath);
295
			const rawContent = await fs.readFile(filePath, "utf-8");
296
297
			try {
298
				const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
299
					rawContent,
300
					frontmatterMapping,
301
				);
302
				const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
303
					slugField,
304
					removeIndexFromSlug,
305
				});
306
307
				posts.push({
308
					filePath,
309
					slug,
310
					frontmatter,
311
					content: body,
312
					rawContent,
313
					rawFrontmatter,
314
				});
315
			} catch (error) {
316
				console.error(`Error parsing ${relativePath}:`, error);
317
			}
318
		}
319
	}
320
321
	// Sort by publish date (newest first)
322
	posts.sort((a, b) => {
323
		const dateA = new Date(a.frontmatter.publishDate);
324
		const dateB = new Date(b.frontmatter.publishDate);
325
		return dateB.getTime() - dateA.getTime();
326
	});
327
328
	return posts;
329
}
330
331
export function updateFrontmatterWithAtUri(
332
	rawContent: string,
333
	atUri: string,
334
): string {
335
	// Detect which delimiter is used (---, +++, or ***)
336
	const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
337
	const delimiter = delimiterMatch?.[1] ?? "---";
338
	const isToml = delimiter === "+++";
339
340
	// Format the atUri entry based on frontmatter type
341
	const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
342
343
	// Check if atUri already exists in frontmatter (handle both formats)
344
	if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
345
		// Replace existing atUri (match both YAML and TOML formats)
346
		return rawContent.replace(
347
			/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
348
			`${atUriEntry}\n`,
349
		);
350
	}
351
352
	// Insert atUri before the closing delimiter
353
	const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
354
	if (frontmatterEndIndex === -1) {
355
		throw new Error("Could not find frontmatter end");
356
	}
357
358
	const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
359
	const afterEnd = rawContent.slice(frontmatterEndIndex);
360
361
	return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
362
}
363
364
export function stripMarkdownForText(markdown: string): string {
365
	return markdown
366
		.replace(/#{1,6}\s/g, "") // Remove headers
367
		.replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
368
		.replace(/\*([^*]+)\*/g, "$1") // Remove italic
369
		.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
370
		.replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
371
		.replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
372
		.replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
373
		.replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
374
		.trim();
375
}