packages/cli/src/lib/markdown.ts 6.9 K raw
1
import * as path from "path";
2
import { Glob } from "bun";
3
import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
4
5
export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
6
  frontmatter: PostFrontmatter;
7
  body: string;
8
} {
9
  // Support multiple frontmatter delimiters:
10
  // --- (YAML) - Jekyll, Astro, most SSGs
11
  // +++ (TOML) - Hugo
12
  // *** - Alternative format
13
  const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
14
  const match = content.match(frontmatterRegex);
15
16
  if (!match) {
17
    throw new Error("Could not parse frontmatter");
18
  }
19
20
  const delimiter = match[1];
21
  const frontmatterStr = match[2] ?? "";
22
  const body = match[3] ?? "";
23
24
  // Determine format based on delimiter:
25
  // +++ uses TOML (key = value)
26
  // --- and *** use YAML (key: value)
27
  const isToml = delimiter === "+++";
28
  const separator = isToml ? "=" : ":";
29
30
  // Parse frontmatter manually
31
  const raw: Record<string, unknown> = {};
32
  const lines = frontmatterStr.split("\n");
33
34
  for (const line of lines) {
35
    const sepIndex = line.indexOf(separator);
36
    if (sepIndex === -1) continue;
37
38
    const key = line.slice(0, sepIndex).trim();
39
    let value = line.slice(sepIndex + 1).trim();
40
41
    // Handle quoted strings
42
    if (
43
      (value.startsWith('"') && value.endsWith('"')) ||
44
      (value.startsWith("'") && value.endsWith("'"))
45
    ) {
46
      value = value.slice(1, -1);
47
    }
48
49
    // Handle arrays (simple case for tags)
50
    if (value.startsWith("[") && value.endsWith("]")) {
51
      const arrayContent = value.slice(1, -1);
52
      raw[key] = arrayContent
53
        .split(",")
54
        .map((item) => item.trim().replace(/^["']|["']$/g, ""));
55
    } else if (value === "true") {
56
      raw[key] = true;
57
    } else if (value === "false") {
58
      raw[key] = false;
59
    } else {
60
      raw[key] = value;
61
    }
62
  }
63
64
  // Apply field mappings to normalize to standard PostFrontmatter fields
65
  const frontmatter: Record<string, unknown> = {};
66
67
  // Title mapping
68
  const titleField = mapping?.title || "title";
69
  frontmatter.title = raw[titleField] || raw.title;
70
71
  // Description mapping
72
  const descField = mapping?.description || "description";
73
  frontmatter.description = raw[descField] || raw.description;
74
75
  // Publish date mapping - check custom field first, then fallbacks
76
  const dateField = mapping?.publishDate;
77
  if (dateField && raw[dateField]) {
78
    frontmatter.publishDate = raw[dateField];
79
  } else if (raw.publishDate) {
80
    frontmatter.publishDate = raw.publishDate;
81
  } else {
82
    // Fallback to common date field names
83
    const dateFields = ["pubDate", "date", "createdAt", "created_at"];
84
    for (const field of dateFields) {
85
      if (raw[field]) {
86
        frontmatter.publishDate = raw[field];
87
        break;
88
      }
89
    }
90
  }
91
92
  // Cover image mapping
93
  const coverField = mapping?.coverImage || "ogImage";
94
  frontmatter.ogImage = raw[coverField] || raw.ogImage;
95
96
  // Tags mapping
97
  const tagsField = mapping?.tags || "tags";
98
  frontmatter.tags = raw[tagsField] || raw.tags;
99
100
  // Always preserve atUri (internal field)
101
  frontmatter.atUri = raw.atUri;
102
103
  return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
104
}
105
106
export function getSlugFromFilename(filename: string): string {
107
  return filename
108
    .replace(/\.mdx?$/, "")
109
    .toLowerCase()
110
    .replace(/\s+/g, "-");
111
}
112
113
export async function getContentHash(content: string): Promise<string> {
114
  const encoder = new TextEncoder();
115
  const data = encoder.encode(content);
116
  const hashBuffer = await crypto.subtle.digest("SHA-256", data);
117
  const hashArray = Array.from(new Uint8Array(hashBuffer));
118
  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
119
}
120
121
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
122
  for (const pattern of ignorePatterns) {
123
    const glob = new Glob(pattern);
124
    if (glob.match(relativePath)) {
125
      return true;
126
    }
127
  }
128
  return false;
129
}
130
131
export async function scanContentDirectory(
132
  contentDir: string,
133
  frontmatterMapping?: FrontmatterMapping,
134
  ignorePatterns: string[] = []
135
): Promise<BlogPost[]> {
136
  const patterns = ["**/*.md", "**/*.mdx"];
137
  const posts: BlogPost[] = [];
138
139
  for (const pattern of patterns) {
140
    const glob = new Glob(pattern);
141
142
    for await (const relativePath of glob.scan({
143
      cwd: contentDir,
144
      absolute: false,
145
    })) {
146
      // Skip files matching ignore patterns
147
      if (shouldIgnore(relativePath, ignorePatterns)) {
148
        continue;
149
      }
150
151
      const filePath = path.join(contentDir, relativePath);
152
      const file = Bun.file(filePath);
153
      const rawContent = await file.text();
154
155
      try {
156
        const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
157
        const filename = path.basename(relativePath);
158
        const slug = getSlugFromFilename(filename);
159
160
        posts.push({
161
          filePath,
162
          slug,
163
          frontmatter,
164
          content: body,
165
          rawContent,
166
        });
167
      } catch (error) {
168
        console.error(`Error parsing ${relativePath}:`, error);
169
      }
170
    }
171
  }
172
173
  // Sort by publish date (newest first)
174
  posts.sort((a, b) => {
175
    const dateA = new Date(a.frontmatter.publishDate);
176
    const dateB = new Date(b.frontmatter.publishDate);
177
    return dateB.getTime() - dateA.getTime();
178
  });
179
180
  return posts;
181
}
182
183
export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
184
  // Detect which delimiter is used (---, +++, or ***)
185
  const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
186
  const delimiter = delimiterMatch?.[1] ?? "---";
187
  const isToml = delimiter === "+++";
188
189
  // Format the atUri entry based on frontmatter type
190
  const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
191
192
  // Check if atUri already exists in frontmatter (handle both formats)
193
  if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
194
    // Replace existing atUri (match both YAML and TOML formats)
195
    return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
196
  }
197
198
  // Insert atUri before the closing delimiter
199
  const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
200
  if (frontmatterEndIndex === -1) {
201
    throw new Error("Could not find frontmatter end");
202
  }
203
204
  const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
205
  const afterEnd = rawContent.slice(frontmatterEndIndex);
206
207
  return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
208
}
209
210
export function stripMarkdownForText(markdown: string): string {
211
  return markdown
212
    .replace(/#{1,6}\s/g, "") // Remove headers
213
    .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
214
    .replace(/\*([^*]+)\*/g, "$1") // Remove italic
215
    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
216
    .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
217
    .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
218
    .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
219
    .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
220
    .trim();
221
}