packages/cli/src/lib/markdown.ts 6.9 K raw
1
import * as fs from "fs/promises";
2
import * as path from "path";
3
import { glob } from "glob";
4
import { minimatch } from "minimatch";
5
import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
6
7
export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
8
  frontmatter: PostFrontmatter;
9
  body: string;
10
} {
11
  // Support multiple frontmatter delimiters:
12
  // --- (YAML) - Jekyll, Astro, most SSGs
13
  // +++ (TOML) - Hugo
14
  // *** - Alternative format
15
  const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
16
  const match = content.match(frontmatterRegex);
17
18
  if (!match) {
19
    throw new Error("Could not parse frontmatter");
20
  }
21
22
  const delimiter = match[1];
23
  const frontmatterStr = match[2] ?? "";
24
  const body = match[3] ?? "";
25
26
  // Determine format based on delimiter:
27
  // +++ uses TOML (key = value)
28
  // --- and *** use YAML (key: value)
29
  const isToml = delimiter === "+++";
30
  const separator = isToml ? "=" : ":";
31
32
  // Parse frontmatter manually
33
  const raw: Record<string, unknown> = {};
34
  const lines = frontmatterStr.split("\n");
35
36
  for (const line of lines) {
37
    const sepIndex = line.indexOf(separator);
38
    if (sepIndex === -1) continue;
39
40
    const key = line.slice(0, sepIndex).trim();
41
    let value = line.slice(sepIndex + 1).trim();
42
43
    // Handle quoted strings
44
    if (
45
      (value.startsWith('"') && value.endsWith('"')) ||
46
      (value.startsWith("'") && value.endsWith("'"))
47
    ) {
48
      value = value.slice(1, -1);
49
    }
50
51
    // Handle arrays (simple case for tags)
52
    if (value.startsWith("[") && value.endsWith("]")) {
53
      const arrayContent = value.slice(1, -1);
54
      raw[key] = arrayContent
55
        .split(",")
56
        .map((item) => item.trim().replace(/^["']|["']$/g, ""));
57
    } else if (value === "true") {
58
      raw[key] = true;
59
    } else if (value === "false") {
60
      raw[key] = false;
61
    } else {
62
      raw[key] = value;
63
    }
64
  }
65
66
  // Apply field mappings to normalize to standard PostFrontmatter fields
67
  const frontmatter: Record<string, unknown> = {};
68
69
  // Title mapping
70
  const titleField = mapping?.title || "title";
71
  frontmatter.title = raw[titleField] || raw.title;
72
73
  // Description mapping
74
  const descField = mapping?.description || "description";
75
  frontmatter.description = raw[descField] || raw.description;
76
77
  // Publish date mapping - check custom field first, then fallbacks
78
  const dateField = mapping?.publishDate;
79
  if (dateField && raw[dateField]) {
80
    frontmatter.publishDate = raw[dateField];
81
  } else if (raw.publishDate) {
82
    frontmatter.publishDate = raw.publishDate;
83
  } else {
84
    // Fallback to common date field names
85
    const dateFields = ["pubDate", "date", "createdAt", "created_at"];
86
    for (const field of dateFields) {
87
      if (raw[field]) {
88
        frontmatter.publishDate = raw[field];
89
        break;
90
      }
91
    }
92
  }
93
94
  // Cover image mapping
95
  const coverField = mapping?.coverImage || "ogImage";
96
  frontmatter.ogImage = raw[coverField] || raw.ogImage;
97
98
  // Tags mapping
99
  const tagsField = mapping?.tags || "tags";
100
  frontmatter.tags = raw[tagsField] || raw.tags;
101
102
  // Always preserve atUri (internal field)
103
  frontmatter.atUri = raw.atUri;
104
105
  return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
106
}
107
108
export function getSlugFromFilename(filename: string): string {
109
  return filename
110
    .replace(/\.mdx?$/, "")
111
    .toLowerCase()
112
    .replace(/\s+/g, "-");
113
}
114
115
export async function getContentHash(content: string): Promise<string> {
116
  const encoder = new TextEncoder();
117
  const data = encoder.encode(content);
118
  const hashBuffer = await crypto.subtle.digest("SHA-256", data);
119
  const hashArray = Array.from(new Uint8Array(hashBuffer));
120
  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
121
}
122
123
function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
124
  for (const pattern of ignorePatterns) {
125
    if (minimatch(relativePath, pattern)) {
126
      return true;
127
    }
128
  }
129
  return false;
130
}
131
132
export async function scanContentDirectory(
133
  contentDir: string,
134
  frontmatterMapping?: FrontmatterMapping,
135
  ignorePatterns: string[] = []
136
): Promise<BlogPost[]> {
137
  const patterns = ["**/*.md", "**/*.mdx"];
138
  const posts: BlogPost[] = [];
139
140
  for (const pattern of patterns) {
141
    const files = await glob(pattern, {
142
      cwd: contentDir,
143
      absolute: false,
144
    });
145
146
    for (const relativePath of files) {
147
      // Skip files matching ignore patterns
148
      if (shouldIgnore(relativePath, ignorePatterns)) {
149
        continue;
150
      }
151
152
      const filePath = path.join(contentDir, relativePath);
153
      const rawContent = await fs.readFile(filePath, "utf-8");
154
155
      try {
156
        const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
157
        const filename = path.basename(relativePath);
158
        const slug = getSlugFromFilename(filename);
159
160
        posts.push({
161
          filePath,
162
          slug,
163
          frontmatter,
164
          content: body,
165
          rawContent,
166
        });
167
      } catch (error) {
168
        console.error(`Error parsing ${relativePath}:`, error);
169
      }
170
    }
171
  }
172
173
  // Sort by publish date (newest first)
174
  posts.sort((a, b) => {
175
    const dateA = new Date(a.frontmatter.publishDate);
176
    const dateB = new Date(b.frontmatter.publishDate);
177
    return dateB.getTime() - dateA.getTime();
178
  });
179
180
  return posts;
181
}
182
183
export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
184
  // Detect which delimiter is used (---, +++, or ***)
185
  const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
186
  const delimiter = delimiterMatch?.[1] ?? "---";
187
  const isToml = delimiter === "+++";
188
189
  // Format the atUri entry based on frontmatter type
190
  const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
191
192
  // Check if atUri already exists in frontmatter (handle both formats)
193
  if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
194
    // Replace existing atUri (match both YAML and TOML formats)
195
    return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
196
  }
197
198
  // Insert atUri before the closing delimiter
199
  const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
200
  if (frontmatterEndIndex === -1) {
201
    throw new Error("Could not find frontmatter end");
202
  }
203
204
  const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
205
  const afterEnd = rawContent.slice(frontmatterEndIndex);
206
207
  return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
208
}
209
210
export function stripMarkdownForText(markdown: string): string {
211
  return markdown
212
    .replace(/#{1,6}\s/g, "") // Remove headers
213
    .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
214
    .replace(/\*([^*]+)\*/g, "$1") // Remove italic
215
    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
216
    .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
217
    .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
218
    .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
219
    .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
220
    .trim();
221
}