| 1 | import * as fs from "fs/promises"; |
| 2 | import * as path from "path"; |
| 3 | import { glob } from "glob"; |
| 4 | import { minimatch } from "minimatch"; |
| 5 | import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types"; |
| 6 | |
| 7 | export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): { |
| 8 | frontmatter: PostFrontmatter; |
| 9 | body: string; |
| 10 | } { |
| 11 | // Support multiple frontmatter delimiters: |
| 12 | // --- (YAML) - Jekyll, Astro, most SSGs |
| 13 | // +++ (TOML) - Hugo |
| 14 | // *** - Alternative format |
| 15 | const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/; |
| 16 | const match = content.match(frontmatterRegex); |
| 17 | |
| 18 | if (!match) { |
| 19 | throw new Error("Could not parse frontmatter"); |
| 20 | } |
| 21 | |
| 22 | const delimiter = match[1]; |
| 23 | const frontmatterStr = match[2] ?? ""; |
| 24 | const body = match[3] ?? ""; |
| 25 | |
| 26 | // Determine format based on delimiter: |
| 27 | // +++ uses TOML (key = value) |
| 28 | // --- and *** use YAML (key: value) |
| 29 | const isToml = delimiter === "+++"; |
| 30 | const separator = isToml ? "=" : ":"; |
| 31 | |
| 32 | // Parse frontmatter manually |
| 33 | const raw: Record<string, unknown> = {}; |
| 34 | const lines = frontmatterStr.split("\n"); |
| 35 | |
| 36 | for (const line of lines) { |
| 37 | const sepIndex = line.indexOf(separator); |
| 38 | if (sepIndex === -1) continue; |
| 39 | |
| 40 | const key = line.slice(0, sepIndex).trim(); |
| 41 | let value = line.slice(sepIndex + 1).trim(); |
| 42 | |
| 43 | // Handle quoted strings |
| 44 | if ( |
| 45 | (value.startsWith('"') && value.endsWith('"')) || |
| 46 | (value.startsWith("'") && value.endsWith("'")) |
| 47 | ) { |
| 48 | value = value.slice(1, -1); |
| 49 | } |
| 50 | |
| 51 | // Handle arrays (simple case for tags) |
| 52 | if (value.startsWith("[") && value.endsWith("]")) { |
| 53 | const arrayContent = value.slice(1, -1); |
| 54 | raw[key] = arrayContent |
| 55 | .split(",") |
| 56 | .map((item) => item.trim().replace(/^["']|["']$/g, "")); |
| 57 | } else if (value === "true") { |
| 58 | raw[key] = true; |
| 59 | } else if (value === "false") { |
| 60 | raw[key] = false; |
| 61 | } else { |
| 62 | raw[key] = value; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | // Apply field mappings to normalize to standard PostFrontmatter fields |
| 67 | const frontmatter: Record<string, unknown> = {}; |
| 68 | |
| 69 | // Title mapping |
| 70 | const titleField = mapping?.title || "title"; |
| 71 | frontmatter.title = raw[titleField] || raw.title; |
| 72 | |
| 73 | // Description mapping |
| 74 | const descField = mapping?.description || "description"; |
| 75 | frontmatter.description = raw[descField] || raw.description; |
| 76 | |
| 77 | // Publish date mapping - check custom field first, then fallbacks |
| 78 | const dateField = mapping?.publishDate; |
| 79 | if (dateField && raw[dateField]) { |
| 80 | frontmatter.publishDate = raw[dateField]; |
| 81 | } else if (raw.publishDate) { |
| 82 | frontmatter.publishDate = raw.publishDate; |
| 83 | } else { |
| 84 | // Fallback to common date field names |
| 85 | const dateFields = ["pubDate", "date", "createdAt", "created_at"]; |
| 86 | for (const field of dateFields) { |
| 87 | if (raw[field]) { |
| 88 | frontmatter.publishDate = raw[field]; |
| 89 | break; |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | // Cover image mapping |
| 95 | const coverField = mapping?.coverImage || "ogImage"; |
| 96 | frontmatter.ogImage = raw[coverField] || raw.ogImage; |
| 97 | |
| 98 | // Tags mapping |
| 99 | const tagsField = mapping?.tags || "tags"; |
| 100 | frontmatter.tags = raw[tagsField] || raw.tags; |
| 101 | |
| 102 | // Always preserve atUri (internal field) |
| 103 | frontmatter.atUri = raw.atUri; |
| 104 | |
| 105 | return { frontmatter: frontmatter as unknown as PostFrontmatter, body }; |
| 106 | } |
| 107 | |
| 108 | export function getSlugFromFilename(filename: string): string { |
| 109 | return filename |
| 110 | .replace(/\.mdx?$/, "") |
| 111 | .toLowerCase() |
| 112 | .replace(/\s+/g, "-"); |
| 113 | } |
| 114 | |
| 115 | export async function getContentHash(content: string): Promise<string> { |
| 116 | const encoder = new TextEncoder(); |
| 117 | const data = encoder.encode(content); |
| 118 | const hashBuffer = await crypto.subtle.digest("SHA-256", data); |
| 119 | const hashArray = Array.from(new Uint8Array(hashBuffer)); |
| 120 | return hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); |
| 121 | } |
| 122 | |
| 123 | function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean { |
| 124 | for (const pattern of ignorePatterns) { |
| 125 | if (minimatch(relativePath, pattern)) { |
| 126 | return true; |
| 127 | } |
| 128 | } |
| 129 | return false; |
| 130 | } |
| 131 | |
| 132 | export async function scanContentDirectory( |
| 133 | contentDir: string, |
| 134 | frontmatterMapping?: FrontmatterMapping, |
| 135 | ignorePatterns: string[] = [] |
| 136 | ): Promise<BlogPost[]> { |
| 137 | const patterns = ["**/*.md", "**/*.mdx"]; |
| 138 | const posts: BlogPost[] = []; |
| 139 | |
| 140 | for (const pattern of patterns) { |
| 141 | const files = await glob(pattern, { |
| 142 | cwd: contentDir, |
| 143 | absolute: false, |
| 144 | }); |
| 145 | |
| 146 | for (const relativePath of files) { |
| 147 | // Skip files matching ignore patterns |
| 148 | if (shouldIgnore(relativePath, ignorePatterns)) { |
| 149 | continue; |
| 150 | } |
| 151 | |
| 152 | const filePath = path.join(contentDir, relativePath); |
| 153 | const rawContent = await fs.readFile(filePath, "utf-8"); |
| 154 | |
| 155 | try { |
| 156 | const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping); |
| 157 | const filename = path.basename(relativePath); |
| 158 | const slug = getSlugFromFilename(filename); |
| 159 | |
| 160 | posts.push({ |
| 161 | filePath, |
| 162 | slug, |
| 163 | frontmatter, |
| 164 | content: body, |
| 165 | rawContent, |
| 166 | }); |
| 167 | } catch (error) { |
| 168 | console.error(`Error parsing ${relativePath}:`, error); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | // Sort by publish date (newest first) |
| 174 | posts.sort((a, b) => { |
| 175 | const dateA = new Date(a.frontmatter.publishDate); |
| 176 | const dateB = new Date(b.frontmatter.publishDate); |
| 177 | return dateB.getTime() - dateA.getTime(); |
| 178 | }); |
| 179 | |
| 180 | return posts; |
| 181 | } |
| 182 | |
| 183 | export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string { |
| 184 | // Detect which delimiter is used (---, +++, or ***) |
| 185 | const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/); |
| 186 | const delimiter = delimiterMatch?.[1] ?? "---"; |
| 187 | const isToml = delimiter === "+++"; |
| 188 | |
| 189 | // Format the atUri entry based on frontmatter type |
| 190 | const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`; |
| 191 | |
| 192 | // Check if atUri already exists in frontmatter (handle both formats) |
| 193 | if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) { |
| 194 | // Replace existing atUri (match both YAML and TOML formats) |
| 195 | return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`); |
| 196 | } |
| 197 | |
| 198 | // Insert atUri before the closing delimiter |
| 199 | const frontmatterEndIndex = rawContent.indexOf(delimiter, 4); |
| 200 | if (frontmatterEndIndex === -1) { |
| 201 | throw new Error("Could not find frontmatter end"); |
| 202 | } |
| 203 | |
| 204 | const beforeEnd = rawContent.slice(0, frontmatterEndIndex); |
| 205 | const afterEnd = rawContent.slice(frontmatterEndIndex); |
| 206 | |
| 207 | return `${beforeEnd}${atUriEntry}\n${afterEnd}`; |
| 208 | } |
| 209 | |
| 210 | export function stripMarkdownForText(markdown: string): string { |
| 211 | return markdown |
| 212 | .replace(/#{1,6}\s/g, "") // Remove headers |
| 213 | .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold |
| 214 | .replace(/\*([^*]+)\*/g, "$1") // Remove italic |
| 215 | .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text |
| 216 | .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks |
| 217 | .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting |
| 218 | .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images |
| 219 | .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines |
| 220 | .trim(); |
| 221 | } |