| 1 | import * as fs from "node:fs/promises"; |
| 2 | import * as path from "node:path"; |
| 3 | import { glob } from "glob"; |
| 4 | import yaml from "js-yaml"; |
| 5 | import { minimatch } from "minimatch"; |
| 6 | import * as toml from "smol-toml"; |
| 7 | import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types"; |
| 8 | |
| 9 | export function parseFrontmatter( |
| 10 | content: string, |
| 11 | mapping?: FrontmatterMapping, |
| 12 | ): { |
| 13 | frontmatter: PostFrontmatter; |
| 14 | body: string; |
| 15 | rawFrontmatter: Record<string, unknown>; |
| 16 | } { |
| 17 | // Support multiple frontmatter delimiters: |
| 18 | // --- (YAML) - Jekyll, Astro, most SSGs |
| 19 | // +++ (TOML) - Hugo |
| 20 | // *** - Alternative format |
| 21 | const frontmatterRegex = |
| 22 | /^(---|\+\+\+|\*\*\*)\r?\n([\s\S]*?)\r?\n\1\r?\n([\s\S]*)$/; |
| 23 | const match = content.match(frontmatterRegex); |
| 24 | |
| 25 | if (!match) { |
| 26 | throw new Error("Could not parse frontmatter"); |
| 27 | } |
| 28 | |
| 29 | const delimiter = match[1]; |
| 30 | const frontmatterStr = match[2] ?? ""; |
| 31 | const body = match[3] ?? ""; |
| 32 | |
| 33 | // Determine format based on delimiter: |
| 34 | // +++ uses TOML (key = value) |
| 35 | // --- and *** use YAML (key: value) |
| 36 | const isToml = delimiter === "+++"; |
| 37 | |
| 38 | // Parse frontmatter using the appropriate library |
| 39 | let raw: Record<string, unknown>; |
| 40 | if (isToml) { |
| 41 | raw = toml.parse(frontmatterStr) as Record<string, unknown>; |
| 42 | } else { |
| 43 | // Use CORE_SCHEMA to keep dates as strings rather than Date objects |
| 44 | raw = |
| 45 | (yaml.load(frontmatterStr, { schema: yaml.CORE_SCHEMA }) as Record< |
| 46 | string, |
| 47 | unknown |
| 48 | >) ?? {}; |
| 49 | } |
| 50 | |
| 51 | // Apply field mappings to normalize to standard PostFrontmatter fields |
| 52 | const frontmatter: Record<string, unknown> = {}; |
| 53 | |
| 54 | // Title mapping |
| 55 | const titleField = mapping?.title || "title"; |
| 56 | frontmatter.title = raw[titleField] || raw.title; |
| 57 | |
| 58 | // Description mapping |
| 59 | const descField = mapping?.description || "description"; |
| 60 | frontmatter.description = raw[descField] || raw.description; |
| 61 | |
| 62 | // Publish date mapping - check custom field first, then fallbacks |
| 63 | const dateField = mapping?.publishDate; |
| 64 | if (dateField && raw[dateField]) { |
| 65 | frontmatter.publishDate = raw[dateField]; |
| 66 | } else if (raw.publishDate) { |
| 67 | frontmatter.publishDate = raw.publishDate; |
| 68 | } else { |
| 69 | // Fallback to common date field names |
| 70 | const dateFields = ["pubDate", "date", "createdAt", "created_at"]; |
| 71 | for (const field of dateFields) { |
| 72 | if (raw[field]) { |
| 73 | frontmatter.publishDate = raw[field]; |
| 74 | break; |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | // Updated date mapping - check custom field first, then fallbacks |
| 80 | const updatedAtField = mapping?.updatedAt; |
| 81 | if (updatedAtField && raw[updatedAtField]) { |
| 82 | frontmatter.updatedAt = raw[updatedAtField]; |
| 83 | } else if (raw.updatedAt) { |
| 84 | frontmatter.updatedAt = raw.updatedAt; |
| 85 | } else { |
| 86 | // Fallback to common date field names |
| 87 | const updatedAtFields = ["updated_at", "modifiedAt", "modified_at"]; |
| 88 | for (const field of updatedAtFields) { |
| 89 | if (raw[field]) { |
| 90 | frontmatter.updatedAt = raw[field]; |
| 91 | break; |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | // Cover image mapping |
| 97 | const coverField = mapping?.coverImage || "ogImage"; |
| 98 | frontmatter.ogImage = raw[coverField] || raw.ogImage; |
| 99 | |
| 100 | // Tags mapping |
| 101 | const tagsField = mapping?.tags || "tags"; |
| 102 | frontmatter.tags = raw[tagsField] || raw.tags; |
| 103 | |
| 104 | // Draft mapping |
| 105 | const draftField = mapping?.draft || "draft"; |
| 106 | const draftValue = raw[draftField] ?? raw.draft; |
| 107 | if (draftValue !== undefined) { |
| 108 | frontmatter.draft = draftValue === true || draftValue === "true"; |
| 109 | } |
| 110 | |
| 111 | // Always preserve atUri (internal field) |
| 112 | frontmatter.atUri = raw.atUri; |
| 113 | |
| 114 | return { |
| 115 | frontmatter: frontmatter as unknown as PostFrontmatter, |
| 116 | body, |
| 117 | rawFrontmatter: raw, |
| 118 | }; |
| 119 | } |
| 120 | |
| 121 | export function getSlugFromFilename(filename: string): string { |
| 122 | return filename |
| 123 | .replace(/\.mdx?$/, "") |
| 124 | .toLowerCase() |
| 125 | .replace(/\s+/g, "-"); |
| 126 | } |
| 127 | |
| 128 | export interface SlugOptions { |
| 129 | slugField?: string; |
| 130 | removeIndexFromSlug?: boolean; |
| 131 | stripDatePrefix?: boolean; |
| 132 | } |
| 133 | |
| 134 | export function getSlugFromOptions( |
| 135 | relativePath: string, |
| 136 | rawFrontmatter: Record<string, unknown>, |
| 137 | options: SlugOptions = {}, |
| 138 | ): string { |
| 139 | const { |
| 140 | slugField, |
| 141 | removeIndexFromSlug = false, |
| 142 | stripDatePrefix = false, |
| 143 | } = options; |
| 144 | |
| 145 | let slug: string; |
| 146 | |
| 147 | // If slugField is set, try to get the value from frontmatter |
| 148 | if (slugField) { |
| 149 | const frontmatterValue = rawFrontmatter[slugField]; |
| 150 | if (frontmatterValue && typeof frontmatterValue === "string") { |
| 151 | // Remove leading slash if present |
| 152 | slug = frontmatterValue |
| 153 | .replace(/^\//, "") |
| 154 | .toLowerCase() |
| 155 | .replace(/\s+/g, "-"); |
| 156 | } else { |
| 157 | // Fallback to filepath if frontmatter field not found |
| 158 | slug = relativePath |
| 159 | .replace(/\.mdx?$/, "") |
| 160 | .toLowerCase() |
| 161 | .replace(/\s+/g, "-"); |
| 162 | } |
| 163 | } else { |
| 164 | // Default: use filepath |
| 165 | slug = relativePath |
| 166 | .replace(/\.mdx?$/, "") |
| 167 | .toLowerCase() |
| 168 | .replace(/\s+/g, "-"); |
| 169 | } |
| 170 | |
| 171 | // Remove /index or /_index suffix if configured |
| 172 | if (removeIndexFromSlug) { |
| 173 | slug = slug.replace(/\/_?index$/, ""); |
| 174 | } |
| 175 | |
| 176 | // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename |
| 177 | if (stripDatePrefix) { |
| 178 | slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1"); |
| 179 | } |
| 180 | |
| 181 | return slug; |
| 182 | } |
| 183 | |
| 184 | export function resolvePathTemplate(template: string, post: BlogPost): string { |
| 185 | const publishDate = new Date(post.frontmatter.publishDate); |
| 186 | const year = String(publishDate.getFullYear()); |
| 187 | const yearUTC = String(publishDate.getUTCFullYear()); |
| 188 | const month = String(publishDate.getMonth() + 1).padStart(2, "0"); |
| 189 | const monthUTC = String(publishDate.getUTCMonth() + 1).padStart(2, "0"); |
| 190 | const day = String(publishDate.getDate()).padStart(2, "0"); |
| 191 | const dayUTC = String(publishDate.getUTCDate()).padStart(2, "0"); |
| 192 | |
| 193 | const slugifiedTitle = (post.frontmatter.title || "") |
| 194 | .toLowerCase() |
| 195 | .replace(/\s+/g, "-") |
| 196 | .replace(/[^\w-]/g, ""); |
| 197 | |
| 198 | // Replace known tokens |
| 199 | let result = template |
| 200 | .replace(/\{slug\}/g, post.slug) |
| 201 | .replace(/\{year\}/g, year) |
| 202 | .replace(/\{yearUTC\}/g, yearUTC) |
| 203 | .replace(/\{month\}/g, month) |
| 204 | .replace(/\{monthUTC\}/g, monthUTC) |
| 205 | .replace(/\{day\}/g, day) |
| 206 | .replace(/\{dayUTC\}/g, dayUTC) |
| 207 | .replace(/\{title\}/g, slugifiedTitle); |
| 208 | |
| 209 | // Replace any remaining {field} tokens with raw frontmatter values |
| 210 | result = result.replace(/\{(\w+)\}/g, (_match, field: string) => { |
| 211 | const value = post.rawFrontmatter[field]; |
| 212 | if (value != null && typeof value === "string") { |
| 213 | return value; |
| 214 | } |
| 215 | return ""; |
| 216 | }); |
| 217 | |
| 218 | // Ensure leading slash |
| 219 | if (!result.startsWith("/")) { |
| 220 | result = `/${result}`; |
| 221 | } |
| 222 | |
| 223 | return result; |
| 224 | } |
| 225 | |
| 226 | export function resolvePostPath( |
| 227 | post: BlogPost, |
| 228 | pathPrefix?: string, |
| 229 | pathTemplate?: string, |
| 230 | ): string { |
| 231 | if (pathTemplate) { |
| 232 | return resolvePathTemplate(pathTemplate, post); |
| 233 | } |
| 234 | const prefix = pathPrefix ?? "/posts"; |
| 235 | return prefix ? `${prefix}/${post.slug}` : `/${post.slug}`; |
| 236 | } |
| 237 | |
| 238 | export async function getContentHash(content: string): Promise<string> { |
| 239 | const encoder = new TextEncoder(); |
| 240 | const data = encoder.encode(content); |
| 241 | const hashBuffer = await crypto.subtle.digest("SHA-256", data); |
| 242 | const hashArray = Array.from(new Uint8Array(hashBuffer)); |
| 243 | return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); |
| 244 | } |
| 245 | |
| 246 | function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { |
| 247 | for (const pattern of ignorePatterns) { |
| 248 | if (minimatch(relativePath, pattern)) { |
| 249 | return true; |
| 250 | } |
| 251 | } |
| 252 | return false; |
| 253 | } |
| 254 | |
| 255 | export interface ScanOptions { |
| 256 | frontmatterMapping?: FrontmatterMapping; |
| 257 | ignorePatterns?: string[]; |
| 258 | slugField?: string; |
| 259 | removeIndexFromSlug?: boolean; |
| 260 | stripDatePrefix?: boolean; |
| 261 | } |
| 262 | |
| 263 | export async function scanContentDirectory( |
| 264 | contentDir: string, |
| 265 | frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions, |
| 266 | ignorePatterns: string[] = [], |
| 267 | ): Promise<BlogPost[]> { |
| 268 | // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options) |
| 269 | let options: ScanOptions; |
| 270 | if ( |
| 271 | frontmatterMappingOrOptions && |
| 272 | ("frontmatterMapping" in frontmatterMappingOrOptions || |
| 273 | "ignorePatterns" in frontmatterMappingOrOptions || |
| 274 | "slugField" in frontmatterMappingOrOptions) |
| 275 | ) { |
| 276 | options = frontmatterMappingOrOptions as ScanOptions; |
| 277 | } else { |
| 278 | // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?) |
| 279 | options = { |
| 280 | frontmatterMapping: frontmatterMappingOrOptions as |
| 281 | | FrontmatterMapping |
| 282 | | undefined, |
| 283 | ignorePatterns, |
| 284 | }; |
| 285 | } |
| 286 | |
| 287 | const { |
| 288 | frontmatterMapping, |
| 289 | ignorePatterns: ignore = [], |
| 290 | slugField, |
| 291 | removeIndexFromSlug, |
| 292 | stripDatePrefix, |
| 293 | } = options; |
| 294 | |
| 295 | const patterns = ["**/*.md", "**/*.mdx"]; |
| 296 | const posts: BlogPost[] = []; |
| 297 | |
| 298 | for (const pattern of patterns) { |
| 299 | const files = await glob(pattern, { |
| 300 | cwd: contentDir, |
| 301 | absolute: false, |
| 302 | }); |
| 303 | |
| 304 | for (const relativePath of files) { |
| 305 | // Skip files matching ignore patterns |
| 306 | if (shouldIgnore(relativePath, ignore)) { |
| 307 | continue; |
| 308 | } |
| 309 | |
| 310 | const filePath = path.join(contentDir, relativePath); |
| 311 | const rawContent = await fs.readFile(filePath, "utf-8"); |
| 312 | |
| 313 | try { |
| 314 | const { frontmatter, body, rawFrontmatter } = parseFrontmatter( |
| 315 | rawContent, |
| 316 | frontmatterMapping, |
| 317 | ); |
| 318 | const slug = getSlugFromOptions(relativePath, rawFrontmatter, { |
| 319 | slugField, |
| 320 | removeIndexFromSlug, |
| 321 | stripDatePrefix, |
| 322 | }); |
| 323 | |
| 324 | posts.push({ |
| 325 | filePath, |
| 326 | slug, |
| 327 | frontmatter, |
| 328 | content: body, |
| 329 | rawContent, |
| 330 | rawFrontmatter, |
| 331 | }); |
| 332 | } catch (error) { |
| 333 | console.error(`Error parsing ${relativePath}:`, error); |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | // Sort by publish date (newest first) |
| 339 | posts.sort((a, b) => { |
| 340 | const dateA = new Date(a.frontmatter.publishDate); |
| 341 | const dateB = new Date(b.frontmatter.publishDate); |
| 342 | return dateB.getTime() - dateA.getTime(); |
| 343 | }); |
| 344 | |
| 345 | return posts; |
| 346 | } |
| 347 | |
| 348 | export function updateFrontmatterWithAtUri( |
| 349 | rawContent: string, |
| 350 | atUri: string, |
| 351 | ): string { |
| 352 | // Detect which delimiter is used (---, +++, or ***) |
| 353 | const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); |
| 354 | const delimiter = delimiterMatch?.[1] ?? "---"; |
| 355 | const isToml = delimiter === "+++"; |
| 356 | |
| 357 | // Format the atUri entry based on frontmatter type |
| 358 | const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; |
| 359 | |
| 360 | // Check if atUri already exists in frontmatter (handle both formats) |
| 361 | if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { |
| 362 | // Replace existing atUri (match both YAML and TOML formats) |
| 363 | return rawContent.replace( |
| 364 | /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, |
| 365 | `${atUriEntry}\n`, |
| 366 | ); |
| 367 | } |
| 368 | |
| 369 | // Insert atUri before the closing delimiter |
| 370 | const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); |
| 371 | if (frontmatterEndIndex === -1) { |
| 372 | throw new Error("Could not find frontmatter end"); |
| 373 | } |
| 374 | |
| 375 | const beforeEnd = rawContent.slice(0, frontmatterEndIndex); |
| 376 | const afterEnd = rawContent.slice(frontmatterEndIndex); |
| 377 | |
| 378 | return `${beforeEnd}${atUriEntry}\n${afterEnd}`; |
| 379 | } |
| 380 | |
| 381 | export function stripMarkdownForText(markdown: string): string { |
| 382 | return markdown |
| 383 | .replace(/#{1,6}\s/g, "") // Remove headers |
| 384 | .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold |
| 385 | .replace(/\*([^*]+)\*/g, "$1") // Remove italic |
| 386 | .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text |
| 387 | .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks |
| 388 | .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting |
| 389 | .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images |
| 390 | .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines |
| 391 | .trim(); |
| 392 | } |