git.stevedylan.dev

packages/cli/src/lib/markdown.ts 6.9 K raw

import * as fs from "fs/promises";
import * as path from "path";
import { glob } from "glob";
import { minimatch } from "minimatch";
import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";

export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
  frontmatter: PostFrontmatter;
  body: string;
} {
  // Support multiple frontmatter delimiters:
  // --- (YAML) - Jekyll, Astro, most SSGs
  // +++ (TOML) - Hugo
  // *** - Alternative format
  const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
  const match = content.match(frontmatterRegex);

  if (!match) {
    throw new Error("Could not parse frontmatter");
  }

  const delimiter = match[1];
  const frontmatterStr = match[2] ?? "";
  const body = match[3] ?? "";

  // Determine format based on delimiter:
  // +++ uses TOML (key = value)
  // --- and *** use YAML (key: value)
  const isToml = delimiter === "+++";
  const separator = isToml ? "=" : ":";

  // Parse frontmatter manually
  const raw: Record<string, unknown> = {};
  const lines = frontmatterStr.split("\n");

  for (const line of lines) {
    const sepIndex = line.indexOf(separator);
    if (sepIndex === -1) continue;

    const key = line.slice(0, sepIndex).trim();
    let value = line.slice(sepIndex + 1).trim();

    // Handle quoted strings
    if (
      (value.startsWith('"') && value.endsWith('"')) ||
      (value.startsWith("'") && value.endsWith("'"))
    ) {
      value = value.slice(1, -1);
    }

    // Handle arrays (simple case for tags)
    if (value.startsWith("[") && value.endsWith("]")) {
      const arrayContent = value.slice(1, -1);
      raw[key] = arrayContent
        .split(",")
        .map((item) => item.trim().replace(/^["']|["']$/g, ""));
    } else if (value === "true") {
      raw[key] = true;
    } else if (value === "false") {
      raw[key] = false;
    } else {
      raw[key] = value;
    }
  }

  // Apply field mappings to normalize to standard PostFrontmatter fields
  const frontmatter: Record<string, unknown> = {};

  // Title mapping
  const titleField = mapping?.title || "title";
  frontmatter.title = raw[titleField] || raw.title;

  // Description mapping
  const descField = mapping?.description || "description";
  frontmatter.description = raw[descField] || raw.description;

  // Publish date mapping - check custom field first, then fallbacks
  const dateField = mapping?.publishDate;
  if (dateField && raw[dateField]) {
    frontmatter.publishDate = raw[dateField];
  } else if (raw.publishDate) {
    frontmatter.publishDate = raw.publishDate;
  } else {
    // Fallback to common date field names
    const dateFields = ["pubDate", "date", "createdAt", "created_at"];
    for (const field of dateFields) {
      if (raw[field]) {
        frontmatter.publishDate = raw[field];
        break;
      }
    }
  }

  // Cover image mapping
  const coverField = mapping?.coverImage || "ogImage";
  frontmatter.ogImage = raw[coverField] || raw.ogImage;

  // Tags mapping
  const tagsField = mapping?.tags || "tags";
  frontmatter.tags = raw[tagsField] || raw.tags;

  // Always preserve atUri (internal field)
  frontmatter.atUri = raw.atUri;

  return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
}

export function getSlugFromFilename(filename: string): string {
  return filename
    .replace(/\.mdx?$/, "")
    .toLowerCase()
    .replace(/\s+/g, "-");
}

export async function getContentHash(content: string): Promise<string> {
  const encoder = new TextEncoder();
  const data = encoder.encode(content);
  const hashBuffer = await crypto.subtle.digest("SHA-256", data);
  const hashArray = Array.from(new Uint8Array(hashBuffer));
  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}

function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
  for (const pattern of ignorePatterns) {
    if (minimatch(relativePath, pattern)) {
      return true;
    }
  }
  return false;
}

export async function scanContentDirectory(
  contentDir: string,
  frontmatterMapping?: FrontmatterMapping,
  ignorePatterns: string[] = []
): Promise<BlogPost[]> {
  const patterns = ["**/*.md", "**/*.mdx"];
  const posts: BlogPost[] = [];

  for (const pattern of patterns) {
    const files = await glob(pattern, {
      cwd: contentDir,
      absolute: false,
    });

    for (const relativePath of files) {
      // Skip files matching ignore patterns
      if (shouldIgnore(relativePath, ignorePatterns)) {
        continue;
      }

      const filePath = path.join(contentDir, relativePath);
      const rawContent = await fs.readFile(filePath, "utf-8");

      try {
        const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
        const filename = path.basename(relativePath);
        const slug = getSlugFromFilename(filename);

        posts.push({
          filePath,
          slug,
          frontmatter,
          content: body,
          rawContent,
        });
      } catch (error) {
        console.error(`Error parsing ${relativePath}:`, error);
      }
    }
  }

  // Sort by publish date (newest first)
  posts.sort((a, b) => {
    const dateA = new Date(a.frontmatter.publishDate);
    const dateB = new Date(b.frontmatter.publishDate);
    return dateB.getTime() - dateA.getTime();
  });

  return posts;
}

export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
  // Detect which delimiter is used (---, +++, or ***)
  const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
  const delimiter = delimiterMatch?.[1] ?? "---";
  const isToml = delimiter === "+++";

  // Format the atUri entry based on frontmatter type
  const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;

  // Check if atUri already exists in frontmatter (handle both formats)
  if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
    // Replace existing atUri (match both YAML and TOML formats)
    return rawContent.replace(/atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
  }

  // Insert atUri before the closing delimiter
  const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
  if (frontmatterEndIndex === -1) {
    throw new Error("Could not find frontmatter end");
  }

  const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
  const afterEnd = rawContent.slice(frontmatterEndIndex);

  return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
}

export function stripMarkdownForText(markdown: string): string {
  return markdown
    .replace(/#{1,6}\s/g, "") // Remove headers
    .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
    .replace(/\*([^*]+)\*/g, "$1") // Remove italic
    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
    .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
    .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
    .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
    .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
    .trim();
}

1	import * as fs from "fs/promises";
2	import * as path from "path";
3	import { glob } from "glob";
4	import { minimatch } from "minimatch";
5	import type { PostFrontmatter, BlogPost, FrontmatterMapping } from "./types";
6
7	export function parseFrontmatter(content: string, mapping?: FrontmatterMapping): {
8	frontmatter: PostFrontmatter;
9	body: string;
10	} {
11	// Support multiple frontmatter delimiters:
12	// --- (YAML) - Jekyll, Astro, most SSGs
13	// +++ (TOML) - Hugo
14	// *** - Alternative format
15	const frontmatterRegex = /^(---\|\+\+\+\|\\\)\n([\s\S]?)\n\1\n([\s\S]*)$/;
16	const match = content.match(frontmatterRegex);
17
18	if (!match) {
19	throw new Error("Could not parse frontmatter");
20	}
21
22	const delimiter = match[1];
23	const frontmatterStr = match[2] ?? "";
24	const body = match[3] ?? "";
25
26	// Determine format based on delimiter:
27	// +++ uses TOML (key = value)
28	// --- and *** use YAML (key: value)
29	const isToml = delimiter === "+++";
30	const separator = isToml ? "=" : ":";
31
32	// Parse frontmatter manually
33	const raw: Record<string, unknown> = {};
34	const lines = frontmatterStr.split("\n");
35
36	for (const line of lines) {
37	const sepIndex = line.indexOf(separator);
38	if (sepIndex === -1) continue;
39
40	const key = line.slice(0, sepIndex).trim();
41	let value = line.slice(sepIndex + 1).trim();
42
43	// Handle quoted strings
44	if (
45	(value.startsWith('"') && value.endsWith('"')) \|\|
46	(value.startsWith("'") && value.endsWith("'"))
47	) {
48	value = value.slice(1, -1);
49	}
50
51	// Handle arrays (simple case for tags)
52	if (value.startsWith("[") && value.endsWith("]")) {
53	const arrayContent = value.slice(1, -1);
54	raw[key] = arrayContent
55	.split(",")
56	.map((item) => item.trim().replace(/^["']\|["']$/g, ""));
57	} else if (value === "true") {
58	raw[key] = true;
59	} else if (value === "false") {
60	raw[key] = false;
61	} else {
62	raw[key] = value;
63	}
64	}
65
66	// Apply field mappings to normalize to standard PostFrontmatter fields
67	const frontmatter: Record<string, unknown> = {};
68
69	// Title mapping
70	const titleField = mapping?.title \|\| "title";
71	frontmatter.title = raw[titleField] \|\| raw.title;
72
73	// Description mapping
74	const descField = mapping?.description \|\| "description";
75	frontmatter.description = raw[descField] \|\| raw.description;
76
77	// Publish date mapping - check custom field first, then fallbacks
78	const dateField = mapping?.publishDate;
79	if (dateField && raw[dateField]) {
80	frontmatter.publishDate = raw[dateField];
81	} else if (raw.publishDate) {
82	frontmatter.publishDate = raw.publishDate;
83	} else {
84	// Fallback to common date field names
85	const dateFields = ["pubDate", "date", "createdAt", "created_at"];
86	for (const field of dateFields) {
87	if (raw[field]) {
88	frontmatter.publishDate = raw[field];
89	break;
90	}
91	}
92	}
93
94	// Cover image mapping
95	const coverField = mapping?.coverImage \|\| "ogImage";
96	frontmatter.ogImage = raw[coverField] \|\| raw.ogImage;
97
98	// Tags mapping
99	const tagsField = mapping?.tags \|\| "tags";
100	frontmatter.tags = raw[tagsField] \|\| raw.tags;
101
102	// Always preserve atUri (internal field)
103	frontmatter.atUri = raw.atUri;
104
105	return { frontmatter: frontmatter as unknown as PostFrontmatter, body };
106	}
107
108	export function getSlugFromFilename(filename: string): string {
109	return filename
110	.replace(/\.mdx?$/, "")
111	.toLowerCase()
112	.replace(/\s+/g, "-");
113	}
114
115	export async function getContentHash(content: string): Promise<string> {
116	const encoder = new TextEncoder();
117	const data = encoder.encode(content);
118	const hashBuffer = await crypto.subtle.digest("SHA-256", data);
119	const hashArray = Array.from(new Uint8Array(hashBuffer));
120	return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
121	}
122
123	function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
124	for (const pattern of ignorePatterns) {
125	if (minimatch(relativePath, pattern)) {
126	return true;
127	}
128	}
129	return false;
130	}
131
132	export async function scanContentDirectory(
133	contentDir: string,
134	frontmatterMapping?: FrontmatterMapping,
135	ignorePatterns: string[] = []
136	): Promise<BlogPost[]> {
137	const patterns = ["*/.md", "*/.mdx"];
138	const posts: BlogPost[] = [];
139
140	for (const pattern of patterns) {
141	const files = await glob(pattern, {
142	cwd: contentDir,
143	absolute: false,
144	});
145
146	for (const relativePath of files) {
147	// Skip files matching ignore patterns
148	if (shouldIgnore(relativePath, ignorePatterns)) {
149	continue;
150	}
151
152	const filePath = path.join(contentDir, relativePath);
153	const rawContent = await fs.readFile(filePath, "utf-8");
154
155	try {
156	const { frontmatter, body } = parseFrontmatter(rawContent, frontmatterMapping);
157	const filename = path.basename(relativePath);
158	const slug = getSlugFromFilename(filename);
159
160	posts.push({
161	filePath,
162	slug,
163	frontmatter,
164	content: body,
165	rawContent,
166	});
167	} catch (error) {
168	console.error(`Error parsing ${relativePath}:`, error);
169	}
170	}
171	}
172
173	// Sort by publish date (newest first)
174	posts.sort((a, b) => {
175	const dateA = new Date(a.frontmatter.publishDate);
176	const dateB = new Date(b.frontmatter.publishDate);
177	return dateB.getTime() - dateA.getTime();
178	});
179
180	return posts;
181	}
182
183	export function updateFrontmatterWithAtUri(rawContent: string, atUri: string): string {
184	// Detect which delimiter is used (---, +++, or ***)
185	const delimiterMatch = rawContent.match(/^(---\|\+\+\+\|\\\*)/);
186	const delimiter = delimiterMatch?.[1] ?? "---";
187	const isToml = delimiter === "+++";
188
189	// Format the atUri entry based on frontmatter type
190	const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
191
192	// Check if atUri already exists in frontmatter (handle both formats)
193	if (rawContent.includes("atUri:") \|\| rawContent.includes("atUri =")) {
194	// Replace existing atUri (match both YAML and TOML formats)
195	return rawContent.replace(/atUri\s[=:]\s["']?[^"'\n]+["']?\n?/, `${atUriEntry}\n`);
196	}
197
198	// Insert atUri before the closing delimiter
199	const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
200	if (frontmatterEndIndex === -1) {
201	throw new Error("Could not find frontmatter end");
202	}
203
204	const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
205	const afterEnd = rawContent.slice(frontmatterEndIndex);
206
207	return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
208	}
209
210	export function stripMarkdownForText(markdown: string): string {
211	return markdown
212	.replace(/#{1,6}\s/g, "") // Remove headers
213	.replace(/\\([^]+)\\*/g, "$1") // Remove bold
214	.replace(/\([^]+)\*/g, "$1") // Remove italic
215	.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
216	.replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
217	.replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
218	.replace(/!\[.?\]\(.?\)/g, "") // Remove images
219	.replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
220	.trim();
221	}