git.stevedylan.dev

src/lib/feed-operations.ts 13.9 K raw

import { XMLParser } from "fast-xml-parser";
import { COMMON_FEED_PATHS } from "./feed-discovery";

const parser = new XMLParser({
	ignoreAttributes: false,
	attributeNamePrefix: "@_",
	textNodeName: "#text",
	cdataPropName: "__cdata",
	parseAttributeValue: true,
	trimValues: true,
});

/**
 * Decodes HTML entities in a string
 * Handles both named entities (&amp;) and numeric entities (&#038;, &#x26;)
 */
function decodeHtmlEntities(text: string): string {
	if (!text || typeof text !== "string") return text;

	// Create a temporary element to use browser's built-in HTML decoding
	if (typeof document !== "undefined") {
		const textarea = document.createElement("textarea");
		textarea.innerHTML = text;
		return textarea.value;
	}

	// Fallback for non-browser environments (though we're in a browser app)
	// Handle common HTML entities manually
	const entities: Record<string, string> = {
		"&amp;": "&",
		"&lt;": "<",
		"&gt;": ">",
		"&quot;": '"',
		"&#039;": "'",
		"&apos;": "'",
		"&#8217;": "'",
		"&#8216;": "'",
		"&#8220;": '"',
		"&#8221;": '"',
		"&#8211;": "–",
		"&#8212;": "—",
		"&#038;": "&",
	};

	let decoded = text;
	for (const [entity, char] of Object.entries(entities)) {
		decoded = decoded.replace(new RegExp(entity, "g"), char);
	}

	// Handle numeric entities like &#8217;
	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
		return String.fromCharCode(dec);
	});

	// Handle hex entities like &#x27;
	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
		return String.fromCharCode(parseInt(hex, 16));
	});

	return decoded;
}

export interface ParsedFeedData {
	feedData: any;
	posts: any[];
	isAtom: boolean;
}

/**
 * Fetches XML data from a URL with CORS fallback
 */
export async function fetchFeedWithFallback(url: string): Promise<string> {
	try {
		// Try to fetch directly first
		const response = await fetch(url);
		return await response.text();
	} catch {
		// Fall back to primary CORS proxy if direct fetch fails
		try {
			const response = await fetch(
				`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		} catch {
			// Fall back to secondary CORS proxy if primary fails
			const response = await fetch(
				`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		}
	}
}

/**
 * Parses XML data and determines if it's RSS or Atom feed
 */
export function parseFeedXml(xmlData: string): ParsedFeedData {
	let parsedXmlData: any;

	try {
		parsedXmlData = parser.parse(xmlData);
	} catch (error) {
		throw new Error(
			`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
		);
	}

	// Determine if it's RSS or Atom feed
	let feedData: any;
	let posts: any[];
	let isAtom = false;

	if (parsedXmlData.rss) {
		// RSS feed
		feedData = parsedXmlData.rss.channel;
		if (!feedData) {
			throw new Error("RSS feed missing channel element");
		}
		const items = feedData.item || [];
		// Ensure posts is always an array (single item might not be in array)
		posts = Array.isArray(items) ? items : items ? [items] : [];
	} else if (parsedXmlData.feed) {
		// Atom feed
		feedData = parsedXmlData.feed;
		const entries = feedData.entry || [];
		// Ensure posts is always an array (single entry might not be in array)
		posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
		isAtom = true;
	} else if (parsedXmlData["rdf:RDF"]) {
		// RDF/RSS 1.0 feed
		feedData = parsedXmlData["rdf:RDF"].channel;
		const items = parsedXmlData["rdf:RDF"].item || [];
		posts = Array.isArray(items) ? items : items ? [items] : [];
		isAtom = false;
	} else {
		// Log available root elements for debugging
		const rootKeys = Object.keys(parsedXmlData);
		throw new Error(
			`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
		);
	}

	// Filter out empty objects from posts array
	posts = posts.filter((post) => post && Object.keys(post).length > 0);

	return { feedData, posts, isAtom };
}

/**
 * Discovers RSS/Atom feed URL from a website URL
 */
export async function discoverFeed(websiteUrl: string): Promise<{
	feedUrl: string;
	xmlData: string;
} | null> {
	const urlObj = new URL(websiteUrl);
	const origin = urlObj.origin;

	for (const path of COMMON_FEED_PATHS) {
		const testUrl = `${origin}${path}`;

		try {
			// Try primary CORS proxy
			let response: Response;
			try {
				response = await fetch(
					`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			} catch {
				// Fall back to secondary CORS proxy
				response = await fetch(
					`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			}

			if (response.ok) {
				const text = await response.text();
				// Quick check if it looks like XML
				if (
					text.trim().startsWith("<?xml") ||
					text.includes("<rss") ||
					text.includes("<feed")
				) {
					return { feedUrl: testUrl, xmlData: text };
				}
			}
		} catch (error) {
			continue;
		}
	}

	return null;
}

/**
 * Checks if a URL looks like a direct feed URL
 */
export function looksLikeFeedUrl(url: string): boolean {
	return (
		url.includes("/feed") ||
		url.includes("/rss") ||
		url.includes(".xml") ||
		url.includes("/atom")
	);
}

/**
 * Extracts YouTube channel ID from various YouTube URL formats
 * Supports:
 * - https://www.youtube.com/@ChannelHandle
 * - https://www.youtube.com/channel/UC...
 * - https://www.youtube.com/c/ChannelName
 * - https://www.youtube.com/user/Username
 */
export async function extractYouTubeChannelId(
	url: string,
): Promise<string | null> {
	try {
		// Direct channel ID format
		if (url.includes("/channel/")) {
			const match = url.match(/\/channel\/([^/?]+)/);
			return match ? match[1] : null;
		}

		// Handle @ format - need to fetch the page to get channel ID
		if (url.includes("/@")) {
			const handle = url.match(/\/@([^/?]+)/)?.[1];
			if (!handle) return null;

			// Fetch the YouTube page to extract the channel ID from meta tags
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				// Look for channel ID in various places
				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}

				// Alternative: look in meta tags
				const metaMatch = html.match(
					/<meta itemprop="channelId" content="([^"]+)">/,
				);
				if (metaMatch) {
					return metaMatch[1];
				}

				// Alternative: look in link tags
				const linkMatch = html.match(
					/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
				);
				if (linkMatch) {
					return linkMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		// For /c/ and /user/ formats, we also need to fetch the page
		if (url.includes("/c/") || url.includes("/user/")) {
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		return null;
	} catch (error) {
		console.error("Error extracting YouTube channel ID:", error);
		return null;
	}
}

/**
 * Converts YouTube channel URL to RSS feed URL
 */
export async function convertYouTubeUrlToFeed(
	url: string,
): Promise<string | null> {
	const channelId = await extractYouTubeChannelId(url);
	if (!channelId) return null;

	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
}

/**
 * Checks if a URL is a YouTube URL
 */
export function isYouTubeUrl(url: string): boolean {
	return url.includes("youtube.com") || url.includes("youtu.be");
}

/**
 * Extracts YouTube video ID from a video URL
 * Supports:
 * - https://www.youtube.com/watch?v=VIDEO_ID
 * - https://youtu.be/VIDEO_ID
 * - https://www.youtube.com/embed/VIDEO_ID
 */
export function extractYouTubeVideoId(url: string): string | null {
	try {
		// Standard watch URL
		const watchMatch = url.match(/[?&]v=([^&]+)/);
		if (watchMatch) return watchMatch[1];

		// Short URL format
		const shortMatch = url.match(/youtu\.be\/([^?]+)/);
		if (shortMatch) return shortMatch[1];

		// Embed URL format
		const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
		if (embedMatch) return embedMatch[1];

		return null;
	} catch {
		return null;
	}
}

/**
 * Checks if a post is from a YouTube feed
 */
export function isYouTubePost(feedUrl: string | null): boolean {
	if (!feedUrl) return false;
	return feedUrl.includes("youtube.com/feeds/videos.xml");
}

/**
 * Extracts post link from RSS or Atom post entry
 */
export function extractPostLink(post: any, isAtom: boolean): string {
	if (isAtom) {
		// Handle Atom link which can be string, object, or array
		if (typeof post.link === "string") {
			return post.link || post.id || "#";
		} else if (Array.isArray(post.link)) {
			// Find 'alternate' link or use first link
			const alternateLink = post.link.find(
				(l: any) => l["@_rel"] === "alternate" || !l["@_rel"],
			);
			return (
				alternateLink?.["@_href"] || post.link[0]?.["@_href"] || post.id || "#"
			);
		} else if (post.link && typeof post.link === "object") {
			return post.link["@_href"] || post.id || "#";
		}
		return post.id || "#";
	}

	// RSS feed
	const link = post.link || post.guid || post.id;
	if (!link) return "#";

	// Handle link as object (sometimes RSS parsers do this)
	if (typeof link === "object") {
		return link["#text"] || link.__cdata || "#";
	}

	return String(link);
}

/**
 * Extracts author from RSS or Atom post entry
 */
export function extractPostAuthor(
	post: any,
	isAtom: boolean,
	feedTitle: string,
): string {
	if (isAtom) {
		// Atom can have author as object with name property
		const author = post.author;
		if (typeof author === "object" && author !== null) {
			return author.name || author["#text"] || feedTitle;
		}
		return author || feedTitle;
	}

	// RSS feed
	const author = post.author || post["dc:creator"] || post.creator;
	if (!author) return feedTitle;

	// Handle author as object
	if (typeof author === "object") {
		return author["#text"] || author.__cdata || feedTitle;
	}

	return String(author);
}

/**
 * Extracts content from RSS or Atom post entry
 */
export function extractPostContent(post: any, postLink?: string): string {
	// Try various content fields in order of preference
	const content =
		post["content:encoded"] || post.content || post.description || post.summary;

	// Default fallback message
	const fallbackMessage = postLink
		? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
		: "Please open on the web";

	// Handle different content structures
	if (typeof content === "string") {
		const trimmed = content.trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	} else if (content && typeof content === "object") {
		// Handle CDATA or nested text
		const extracted = content.__cdata || content["#text"] || "";
		const trimmed = String(extracted).trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	}

	// No content found - this is fine for link-only feeds
	return fallbackMessage;
}

/**
 * Extracts published date from RSS or Atom post entry
 */
export function extractPostDate(post: any): string {
	try {
		const dateValue = post.pubDate || post.updated || post.published;
		if (!dateValue) {
			return new Date().toISOString(); // Use current date if no date found
		}
		const parsedDate = new Date(dateValue);
		// Check if date is valid
		if (isNaN(parsedDate.getTime())) {
			return new Date().toISOString();
		}
		return parsedDate.toISOString();
	} catch {
		return new Date().toISOString();
	}
}

/**
 * Extract string value from various data types and decode HTML entities
 */
function extractStringValue(value: any): string {
	if (!value) return "";

	let strValue = "";

	if (typeof value === "string") {
		strValue = value;
	} else if (typeof value === "object") {
		// Handle objects that might contain text
		// Try common text properties
		if (value.__cdata) strValue = String(value.__cdata);
		else if (value["#text"]) strValue = String(value["#text"]);
		else if (value.text) strValue = String(value.text);
		// Last resort: return empty string
		else return "";
	} else {
		// For numbers, booleans, etc.
		strValue = String(value);
	}

	// Decode HTML entities before returning
	return decodeHtmlEntities(strValue);
}

/**
 * Safely truncate a string to a maximum length
 */
export function truncateString(str: any, maxLength: number): string {
	const strValue = extractStringValue(str);
	if (!strValue) return "";
	const trimmed = strValue.trim();
	if (trimmed.length <= maxLength) return trimmed;
	return trimmed.substring(0, maxLength - 3) + "...";
}

/**
 * Validate and sanitize feed data for insertion
 */
export function sanitizeFeedData(feedData: any, feed?: any) {
	// Extract title from feedData or feed, handling various formats
	const titleValue = feedData?.title || feed?.title || "Untitled Feed";
	const descValue =
		feedData?.description || feedData?.subtitle || feed?.description || "";

	return {
		title: truncateString(titleValue, 200),
		description: truncateString(descValue, 1000),
	};
}

/**
 * Validate and sanitize post data for insertion
 */
export function sanitizePostData(
	post: any,
	isAtom: boolean,
	feedTitle: string,
) {
	return {
		title: truncateString(post.title || "Untitled", 1000),
		author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
		link: truncateString(extractPostLink(post, isAtom), 1000),
	};
}

1	import { XMLParser } from "fast-xml-parser";
2	import { COMMON_FEED_PATHS } from "./feed-discovery";
3
4	const parser = new XMLParser({
5	ignoreAttributes: false,
6	attributeNamePrefix: "@_",
7	textNodeName: "#text",
8	cdataPropName: "__cdata",
9	parseAttributeValue: true,
10	trimValues: true,
11	});
12
13	/**
14	* Decodes HTML entities in a string
15	* Handles both named entities (&) and numeric entities (&, &)
16	*/
17	function decodeHtmlEntities(text: string): string {
18	if (!text \|\| typeof text !== "string") return text;
19
20	// Create a temporary element to use browser's built-in HTML decoding
21	if (typeof document !== "undefined") {
22	const textarea = document.createElement("textarea");
23	textarea.innerHTML = text;
24	return textarea.value;
25	}
26
27	// Fallback for non-browser environments (though we're in a browser app)
28	// Handle common HTML entities manually
29	const entities: Record<string, string> = {
30	"&": "&",
31	"<": "<",
32	">": ">",
33	""": '"',
34	"'": "'",
35	"'": "'",
36	"’": "'",
37	"‘": "'",
38	"“": '"',
39	"”": '"',
40	"–": "–",
41	"—": "—",
42	"&": "&",
43	};
44
45	let decoded = text;
46	for (const [entity, char] of Object.entries(entities)) {
47	decoded = decoded.replace(new RegExp(entity, "g"), char);
48	}
49
50	// Handle numeric entities like ’
51	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
52	return String.fromCharCode(dec);
53	});
54
55	// Handle hex entities like '
56	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
57	return String.fromCharCode(parseInt(hex, 16));
58	});
59
60	return decoded;
61	}
62
63	export interface ParsedFeedData {
64	feedData: any;
65	posts: any[];
66	isAtom: boolean;
67	}
68
69	/**
70	* Fetches XML data from a URL with CORS fallback
71	*/
72	export async function fetchFeedWithFallback(url: string): Promise<string> {
73	try {
74	// Try to fetch directly first
75	const response = await fetch(url);
76	return await response.text();
77	} catch {
78	// Fall back to primary CORS proxy if direct fetch fails
79	try {
80	const response = await fetch(
81	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
82	);
83	return await response.text();
84	} catch {
85	// Fall back to secondary CORS proxy if primary fails
86	const response = await fetch(
87	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
88	);
89	return await response.text();
90	}
91	}
92	}
93
94	/**
95	* Parses XML data and determines if it's RSS or Atom feed
96	*/
97	export function parseFeedXml(xmlData: string): ParsedFeedData {
98	let parsedXmlData: any;
99
100	try {
101	parsedXmlData = parser.parse(xmlData);
102	} catch (error) {
103	throw new Error(
104	`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
105	);
106	}
107
108	// Determine if it's RSS or Atom feed
109	let feedData: any;
110	let posts: any[];
111	let isAtom = false;
112
113	if (parsedXmlData.rss) {
114	// RSS feed
115	feedData = parsedXmlData.rss.channel;
116	if (!feedData) {
117	throw new Error("RSS feed missing channel element");
118	}
119	const items = feedData.item \|\| [];
120	// Ensure posts is always an array (single item might not be in array)
121	posts = Array.isArray(items) ? items : items ? [items] : [];
122	} else if (parsedXmlData.feed) {
123	// Atom feed
124	feedData = parsedXmlData.feed;
125	const entries = feedData.entry \|\| [];
126	// Ensure posts is always an array (single entry might not be in array)
127	posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
128	isAtom = true;
129	} else if (parsedXmlData["rdf:RDF"]) {
130	// RDF/RSS 1.0 feed
131	feedData = parsedXmlData["rdf:RDF"].channel;
132	const items = parsedXmlData["rdf:RDF"].item \|\| [];
133	posts = Array.isArray(items) ? items : items ? [items] : [];
134	isAtom = false;
135	} else {
136	// Log available root elements for debugging
137	const rootKeys = Object.keys(parsedXmlData);
138	throw new Error(
139	`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
140	);
141	}
142
143	// Filter out empty objects from posts array
144	posts = posts.filter((post) => post && Object.keys(post).length > 0);
145
146	return { feedData, posts, isAtom };
147	}
148
149	/**
150	* Discovers RSS/Atom feed URL from a website URL
151	*/
152	export async function discoverFeed(websiteUrl: string): Promise<{
153	feedUrl: string;
154	xmlData: string;
155	} \| null> {
156	const urlObj = new URL(websiteUrl);
157	const origin = urlObj.origin;
158
159	for (const path of COMMON_FEED_PATHS) {
160	const testUrl = `${origin}${path}`;
161
162	try {
163	// Try primary CORS proxy
164	let response: Response;
165	try {
166	response = await fetch(
167	`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
168	);
169	} catch {
170	// Fall back to secondary CORS proxy
171	response = await fetch(
172	`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
173	);
174	}
175
176	if (response.ok) {
177	const text = await response.text();
178	// Quick check if it looks like XML
179	if (
180	text.trim().startsWith("<?xml") \|\|
181	text.includes("<rss") \|\|
182	text.includes("<feed")
183	) {
184	return { feedUrl: testUrl, xmlData: text };
185	}
186	}
187	} catch (error) {
188	continue;
189	}
190	}
191
192	return null;
193	}
194
195	/**
196	* Checks if a URL looks like a direct feed URL
197	*/
198	export function looksLikeFeedUrl(url: string): boolean {
199	return (
200	url.includes("/feed") \|\|
201	url.includes("/rss") \|\|
202	url.includes(".xml") \|\|
203	url.includes("/atom")
204	);
205	}
206
207	/**
208	* Extracts YouTube channel ID from various YouTube URL formats
209	* Supports:
210	* - https://www.youtube.com/@ChannelHandle
211	* - https://www.youtube.com/channel/UC...
212	* - https://www.youtube.com/c/ChannelName
213	* - https://www.youtube.com/user/Username
214	*/
215	export async function extractYouTubeChannelId(
216	url: string,
217	): Promise<string \| null> {
218	try {
219	// Direct channel ID format
220	if (url.includes("/channel/")) {
221	const match = url.match(/\/channel\/([^/?]+)/);
222	return match ? match[1] : null;
223	}
224
225	// Handle @ format - need to fetch the page to get channel ID
226	if (url.includes("/@")) {
227	const handle = url.match(/\/@([^/?]+)/)?.[1];
228	if (!handle) return null;
229
230	// Fetch the YouTube page to extract the channel ID from meta tags
231	try {
232	let response: Response;
233	try {
234	response = await fetch(
235	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
236	);
237	} catch {
238	// Fall back to secondary CORS proxy
239	response = await fetch(
240	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
241	);
242	}
243	const html = await response.text();
244
245	// Look for channel ID in various places
246	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
247	if (channelIdMatch) {
248	return channelIdMatch[1];
249	}
250
251	// Alternative: look in meta tags
252	const metaMatch = html.match(
253	/<meta itemprop="channelId" content="([^"]+)">/,
254	);
255	if (metaMatch) {
256	return metaMatch[1];
257	}
258
259	// Alternative: look in link tags
260	const linkMatch = html.match(
261	/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
262	);
263	if (linkMatch) {
264	return linkMatch[1];
265	}
266	} catch (error) {
267	console.error("Failed to fetch YouTube page for channel ID:", error);
268	return null;
269	}
270	}
271
272	// For /c/ and /user/ formats, we also need to fetch the page
273	if (url.includes("/c/") \|\| url.includes("/user/")) {
274	try {
275	let response: Response;
276	try {
277	response = await fetch(
278	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
279	);
280	} catch {
281	// Fall back to secondary CORS proxy
282	response = await fetch(
283	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
284	);
285	}
286	const html = await response.text();
287
288	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
289	if (channelIdMatch) {
290	return channelIdMatch[1];
291	}
292	} catch (error) {
293	console.error("Failed to fetch YouTube page for channel ID:", error);
294	return null;
295	}
296	}
297
298	return null;
299	} catch (error) {
300	console.error("Error extracting YouTube channel ID:", error);
301	return null;
302	}
303	}
304
305	/**
306	* Converts YouTube channel URL to RSS feed URL
307	*/
308	export async function convertYouTubeUrlToFeed(
309	url: string,
310	): Promise<string \| null> {
311	const channelId = await extractYouTubeChannelId(url);
312	if (!channelId) return null;
313
314	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
315	}
316
317	/**
318	* Checks if a URL is a YouTube URL
319	*/
320	export function isYouTubeUrl(url: string): boolean {
321	return url.includes("youtube.com") \|\| url.includes("youtu.be");
322	}
323
324	/**
325	* Extracts YouTube video ID from a video URL
326	* Supports:
327	* - https://www.youtube.com/watch?v=VIDEO_ID
328	* - https://youtu.be/VIDEO_ID
329	* - https://www.youtube.com/embed/VIDEO_ID
330	*/
331	export function extractYouTubeVideoId(url: string): string \| null {
332	try {
333	// Standard watch URL
334	const watchMatch = url.match(/[?&]v=([^&]+)/);
335	if (watchMatch) return watchMatch[1];
336
337	// Short URL format
338	const shortMatch = url.match(/youtu\.be\/([^?]+)/);
339	if (shortMatch) return shortMatch[1];
340
341	// Embed URL format
342	const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
343	if (embedMatch) return embedMatch[1];
344
345	return null;
346	} catch {
347	return null;
348	}
349	}
350
351	/**
352	* Checks if a post is from a YouTube feed
353	*/
354	export function isYouTubePost(feedUrl: string \| null): boolean {
355	if (!feedUrl) return false;
356	return feedUrl.includes("youtube.com/feeds/videos.xml");
357	}
358
359	/**
360	* Extracts post link from RSS or Atom post entry
361	*/
362	export function extractPostLink(post: any, isAtom: boolean): string {
363	if (isAtom) {
364	// Handle Atom link which can be string, object, or array
365	if (typeof post.link === "string") {
366	return post.link \|\| post.id \|\| "#";
367	} else if (Array.isArray(post.link)) {
368	// Find 'alternate' link or use first link
369	const alternateLink = post.link.find(
370	(l: any) => l["@_rel"] === "alternate" \|\| !l["@_rel"],
371	);
372	return (
373	alternateLink?.["@_href"] \|\| post.link[0]?.["@_href"] \|\| post.id \|\| "#"
374	);
375	} else if (post.link && typeof post.link === "object") {
376	return post.link["@_href"] \|\| post.id \|\| "#";
377	}
378	return post.id \|\| "#";
379	}
380
381	// RSS feed
382	const link = post.link \|\| post.guid \|\| post.id;
383	if (!link) return "#";
384
385	// Handle link as object (sometimes RSS parsers do this)
386	if (typeof link === "object") {
387	return link["#text"] \|\| link.__cdata \|\| "#";
388	}
389
390	return String(link);
391	}
392
393	/**
394	* Extracts author from RSS or Atom post entry
395	*/
396	export function extractPostAuthor(
397	post: any,
398	isAtom: boolean,
399	feedTitle: string,
400	): string {
401	if (isAtom) {
402	// Atom can have author as object with name property
403	const author = post.author;
404	if (typeof author === "object" && author !== null) {
405	return author.name \|\| author["#text"] \|\| feedTitle;
406	}
407	return author \|\| feedTitle;
408	}
409
410	// RSS feed
411	const author = post.author \|\| post["dc:creator"] \|\| post.creator;
412	if (!author) return feedTitle;
413
414	// Handle author as object
415	if (typeof author === "object") {
416	return author["#text"] \|\| author.__cdata \|\| feedTitle;
417	}
418
419	return String(author);
420	}
421
422	/**
423	* Extracts content from RSS or Atom post entry
424	*/
425	export function extractPostContent(post: any, postLink?: string): string {
426	// Try various content fields in order of preference
427	const content =
428	post["content:encoded"] \|\| post.content \|\| post.description \|\| post.summary;
429
430	// Default fallback message
431	const fallbackMessage = postLink
432	? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
433	: "Please open on the web";
434
435	// Handle different content structures
436	if (typeof content === "string") {
437	const trimmed = content.trim();
438	return trimmed.length > 0 ? trimmed : fallbackMessage;
439	} else if (content && typeof content === "object") {
440	// Handle CDATA or nested text
441	const extracted = content.__cdata \|\| content["#text"] \|\| "";
442	const trimmed = String(extracted).trim();
443	return trimmed.length > 0 ? trimmed : fallbackMessage;
444	}
445
446	// No content found - this is fine for link-only feeds
447	return fallbackMessage;
448	}
449
450	/**
451	* Extracts published date from RSS or Atom post entry
452	*/
453	export function extractPostDate(post: any): string {
454	try {
455	const dateValue = post.pubDate \|\| post.updated \|\| post.published;
456	if (!dateValue) {
457	return new Date().toISOString(); // Use current date if no date found
458	}
459	const parsedDate = new Date(dateValue);
460	// Check if date is valid
461	if (isNaN(parsedDate.getTime())) {
462	return new Date().toISOString();
463	}
464	return parsedDate.toISOString();
465	} catch {
466	return new Date().toISOString();
467	}
468	}
469
470	/**
471	* Extract string value from various data types and decode HTML entities
472	*/
473	function extractStringValue(value: any): string {
474	if (!value) return "";
475
476	let strValue = "";
477
478	if (typeof value === "string") {
479	strValue = value;
480	} else if (typeof value === "object") {
481	// Handle objects that might contain text
482	// Try common text properties
483	if (value.__cdata) strValue = String(value.__cdata);
484	else if (value["#text"]) strValue = String(value["#text"]);
485	else if (value.text) strValue = String(value.text);
486	// Last resort: return empty string
487	else return "";
488	} else {
489	// For numbers, booleans, etc.
490	strValue = String(value);
491	}
492
493	// Decode HTML entities before returning
494	return decodeHtmlEntities(strValue);
495	}
496
497	/**
498	* Safely truncate a string to a maximum length
499	*/
500	export function truncateString(str: any, maxLength: number): string {
501	const strValue = extractStringValue(str);
502	if (!strValue) return "";
503	const trimmed = strValue.trim();
504	if (trimmed.length <= maxLength) return trimmed;
505	return trimmed.substring(0, maxLength - 3) + "...";
506	}
507
508	/**
509	* Validate and sanitize feed data for insertion
510	*/
511	export function sanitizeFeedData(feedData: any, feed?: any) {
512	// Extract title from feedData or feed, handling various formats
513	const titleValue = feedData?.title \|\| feed?.title \|\| "Untitled Feed";
514	const descValue =
515	feedData?.description \|\| feedData?.subtitle \|\| feed?.description \|\| "";
516
517	return {
518	title: truncateString(titleValue, 200),
519	description: truncateString(descValue, 1000),
520	};
521	}
522
523	/**
524	* Validate and sanitize post data for insertion
525	*/
526	export function sanitizePostData(
527	post: any,
528	isAtom: boolean,
529	feedTitle: string,
530	) {
531	return {
532	title: truncateString(post.title \|\| "Untitled", 1000),
533	author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
534	link: truncateString(extractPostLink(post, isAtom), 1000),
535	};
536	}