git.stevedylan.dev

src/lib/feed-operations.ts 14.9 K raw

import { XMLParser } from "fast-xml-parser";
import { COMMON_FEED_PATHS } from "./feed-discovery";

const parser = new XMLParser({
	ignoreAttributes: false,
	attributeNamePrefix: "@_",
	textNodeName: "#text",
	cdataPropName: "__cdata",
	parseAttributeValue: true,
	trimValues: true,
});

/**
 * Decodes HTML entities in a string
 * Handles both named entities (&amp;) and numeric entities (&#038;, &#x26;)
 */
function decodeHtmlEntities(text: string): string {
	if (!text || typeof text !== "string") return text;

	// Create a temporary element to use browser's built-in HTML decoding
	if (typeof document !== "undefined") {
		const textarea = document.createElement("textarea");
		textarea.innerHTML = text;
		return textarea.value;
	}

	// Fallback for non-browser environments (though we're in a browser app)
	// Handle common HTML entities manually
	const entities: Record<string, string> = {
		"&amp;": "&",
		"&lt;": "<",
		"&gt;": ">",
		"&quot;": '"',
		"&#039;": "'",
		"&apos;": "'",
		"&#8217;": "'",
		"&#8216;": "'",
		"&#8220;": '"',
		"&#8221;": '"',
		"&#8211;": "–",
		"&#8212;": "—",
		"&#038;": "&",
	};

	let decoded = text;
	for (const [entity, char] of Object.entries(entities)) {
		decoded = decoded.replace(new RegExp(entity, "g"), char);
	}

	// Handle numeric entities like &#8217;
	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
		return String.fromCharCode(dec);
	});

	// Handle hex entities like &#x27;
	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
		return String.fromCharCode(parseInt(hex, 16));
	});

	return decoded;
}

export interface ParsedFeedData {
	feedData: any;
	posts: any[];
	isAtom: boolean;
}

/**
 * Fetches XML data from a URL with CORS fallback
 */
export async function fetchFeedWithFallback(url: string): Promise<string> {
	try {
		// Try to fetch directly first
		const response = await fetch(url);
		return await response.text();
	} catch {
		// Fall back to primary CORS proxy if direct fetch fails
		try {
			const response = await fetch(
				`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		} catch {
			// Fall back to secondary CORS proxy if primary fails
			const response = await fetch(
				`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		}
	}
}

/**
 * Parses XML data and determines if it's RSS or Atom feed
 */
export function parseFeedXml(xmlData: string): ParsedFeedData {
	let parsedXmlData: any;

	try {
		parsedXmlData = parser.parse(xmlData);
	} catch (error) {
		throw new Error(
			`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
		);
	}

	// Determine if it's RSS or Atom feed
	let feedData: any;
	let posts: any[];
	let isAtom = false;

	if (parsedXmlData.rss) {
		// RSS feed
		feedData = parsedXmlData.rss.channel;
		if (!feedData) {
			throw new Error("RSS feed missing channel element");
		}
		const items = feedData.item || [];
		// Ensure posts is always an array (single item might not be in array)
		posts = Array.isArray(items) ? items : items ? [items] : [];
	} else if (parsedXmlData.feed) {
		// Atom feed
		feedData = parsedXmlData.feed;
		const entries = feedData.entry || [];
		// Ensure posts is always an array (single entry might not be in array)
		posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
		isAtom = true;
	} else if (parsedXmlData["rdf:RDF"]) {
		// RDF/RSS 1.0 feed
		feedData = parsedXmlData["rdf:RDF"].channel;
		const items = parsedXmlData["rdf:RDF"].item || [];
		posts = Array.isArray(items) ? items : items ? [items] : [];
		isAtom = false;
	} else {
		// Log available root elements for debugging
		const rootKeys = Object.keys(parsedXmlData);
		throw new Error(
			`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
		);
	}

	// Filter out empty objects from posts array
	posts = posts.filter((post) => post && Object.keys(post).length > 0);

	return { feedData, posts, isAtom };
}

/**
 * Discovers RSS/Atom feed URL from a website URL
 */
export async function discoverFeed(websiteUrl: string): Promise<{
	feedUrl: string;
	xmlData: string;
} | null> {
	const urlObj = new URL(websiteUrl);
	const origin = urlObj.origin;

	for (const path of COMMON_FEED_PATHS) {
		const testUrl = `${origin}${path}`;

		try {
			// Try primary CORS proxy
			let response: Response;
			try {
				response = await fetch(
					`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			} catch {
				// Fall back to secondary CORS proxy
				response = await fetch(
					`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			}

			if (response.ok) {
				const text = await response.text();
				// Quick check if it looks like XML
				if (
					text.trim().startsWith("<?xml") ||
					text.includes("<rss") ||
					text.includes("<feed")
				) {
					return { feedUrl: testUrl, xmlData: text };
				}
			}
		} catch (error) {
			continue;
		}
	}

	return null;
}

/**
 * Extracts YouTube channel ID from various YouTube URL formats
 * Supports:
 * - https://www.youtube.com/@ChannelHandle
 * - https://www.youtube.com/channel/UC...
 * - https://www.youtube.com/c/ChannelName
 * - https://www.youtube.com/user/Username
 */
export async function extractYouTubeChannelId(
	url: string,
): Promise<string | null> {
	try {
		// Direct channel ID format
		if (url.includes("/channel/")) {
			const match = url.match(/\/channel\/([^/?]+)/);
			return match ? match[1] : null;
		}

		// Handle @ format - need to fetch the page to get channel ID
		if (url.includes("/@")) {
			const handle = url.match(/\/@([^/?]+)/)?.[1];
			if (!handle) return null;

			// Fetch the YouTube page to extract the channel ID from meta tags
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				// Look for channel ID in various places
				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}

				// Alternative: look in meta tags
				const metaMatch = html.match(
					/<meta itemprop="channelId" content="([^"]+)">/,
				);
				if (metaMatch) {
					return metaMatch[1];
				}

				// Alternative: look in link tags
				const linkMatch = html.match(
					/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
				);
				if (linkMatch) {
					return linkMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		// For /c/ and /user/ formats, we also need to fetch the page
		if (url.includes("/c/") || url.includes("/user/")) {
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		return null;
	} catch (error) {
		console.error("Error extracting YouTube channel ID:", error);
		return null;
	}
}

/**
 * Converts YouTube channel URL to RSS feed URL
 */
export async function convertYouTubeUrlToFeed(
	url: string,
): Promise<string | null> {
	const channelId = await extractYouTubeChannelId(url);
	if (!channelId) return null;

	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
}

/**
 * Checks if a URL is a YouTube URL
 */
export function isYouTubeUrl(url: string): boolean {
	return url.includes("youtube.com") || url.includes("youtu.be");
}

/**
 * Extracts YouTube video ID from a video URL
 * Supports:
 * - https://www.youtube.com/watch?v=VIDEO_ID
 * - https://youtu.be/VIDEO_ID
 * - https://www.youtube.com/embed/VIDEO_ID
 */
export function extractYouTubeVideoId(url: string): string | null {
	try {
		// Standard watch URL
		const watchMatch = url.match(/[?&]v=([^&]+)/);
		if (watchMatch) return watchMatch[1];

		// Short URL format
		const shortMatch = url.match(/youtu\.be\/([^?]+)/);
		if (shortMatch) return shortMatch[1];

		// Embed URL format
		const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
		if (embedMatch) return embedMatch[1];

		return null;
	} catch {
		return null;
	}
}

/**
 * Checks if a post is from a YouTube feed
 */
export function isYouTubePost(feedUrl: string | null): boolean {
	if (!feedUrl) return false;
	return feedUrl.includes("youtube.com/feeds/videos.xml");
}

/**
 * Extracts post link from RSS or Atom post entry
 */
export function extractPostLink(post: any, isAtom: boolean): string {
	if (isAtom) {
		// Handle Atom link which can be string, object, or array
		if (typeof post.link === "string") {
			return post.link || post.id || "#";
		} else if (Array.isArray(post.link)) {
			// Find 'alternate' link or use first link
			const alternateLink = post.link.find(
				(l: any) => l["@_rel"] === "alternate" || !l["@_rel"],
			);
			return (
				alternateLink?.["@_href"] || post.link[0]?.["@_href"] || post.id || "#"
			);
		} else if (post.link && typeof post.link === "object") {
			return post.link["@_href"] || post.id || "#";
		}
		return post.id || "#";
	}

	// RSS feed
	const link = post.link || post.guid || post.id;
	if (!link) return "#";

	// Handle link as object (sometimes RSS parsers do this)
	if (typeof link === "object") {
		return link["#text"] || link.__cdata || "#";
	}

	return String(link);
}

/**
 * Extracts author from RSS or Atom post entry
 */
export function extractPostAuthor(
	post: any,
	isAtom: boolean,
	feedTitle: string,
): string {
	if (isAtom) {
		// Atom can have author as object with name property
		const author = post.author;
		if (typeof author === "object" && author !== null) {
			return author.name || author["#text"] || feedTitle;
		}
		return author || feedTitle;
	}

	// RSS feed
	const author = post.author || post["dc:creator"] || post.creator;
	if (!author) return feedTitle;

	// Handle author as object
	if (typeof author === "object") {
		return author["#text"] || author.__cdata || feedTitle;
	}

	return String(author);
}

/**
 * Extracts content from RSS or Atom post entry
 */
export function extractPostContent(post: any, postLink?: string): string {
	// Try various content fields in order of preference
	const content =
		post["content:encoded"] || post.content || post.description || post.summary;

	// Default fallback message
	const fallbackMessage = postLink
		? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
		: "Please open on the web";

	// Handle different content structures
	if (typeof content === "string") {
		const trimmed = content.trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	} else if (content && typeof content === "object") {
		// Handle CDATA or nested text
		const extracted = content.__cdata || content["#text"] || "";
		const trimmed = String(extracted).trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	}

	// No content found - this is fine for link-only feeds
	return fallbackMessage;
}

/**
 * Normalizes date strings to handle problematic formats like "24:00:00"
 * which causes errors in WebKit browsers
 */
function normalizeDateString(dateString: string): string {
	if (!dateString || typeof dateString !== 'string') return dateString;
	
	// Handle the 24:00:00 time format by converting it to 00:00:00 of the next day
	if (dateString.includes('24:00:00')) {
		// Replace 24:00:00 with 00:00:00
		const normalizedDate = dateString.replace(/24:00:00/, '00:00:00');
		
		try {
			// Parse the normalized date and add one day
			const tempDate = new Date(normalizedDate);
			if (!isNaN(tempDate.getTime())) {
				tempDate.setDate(tempDate.getDate() + 1);
				return tempDate.toISOString();
			}
		} catch {
			// If parsing fails, continue with the original string replacement
		}
		
		return normalizedDate;
	}
	
	return dateString;
}

/**
 * Extracts published date from RSS or Atom post entry
 */
export function extractPostDate(post: any, isAtom?: boolean): string {
	try {
		let dateValue: any;
		
		if (isAtom) {
			// For Atom feeds, prioritize published date over updated date
			dateValue = post.published || post.updated;
		} else {
			// For RSS feeds, use pubDate first, then fall back to Atom fields
			dateValue = post.pubDate || post.published || post.updated;
		}
		
		if (!dateValue) {
			return new Date().toISOString(); // Use current date if no date found
		}
		
		// Normalize the date string to handle problematic formats
		const normalizedDateValue = normalizeDateString(String(dateValue));
		
		const parsedDate = new Date(normalizedDateValue);
		// Check if date is valid
		if (isNaN(parsedDate.getTime())) {
			return new Date().toISOString();
		}
		return parsedDate.toISOString();
	} catch {
		return new Date().toISOString();
	}
}

/**
 * Extract string value from various data types and decode HTML entities
 */
function extractStringValue(value: any): string {
	if (!value) return "";

	let strValue = "";

	if (typeof value === "string") {
		strValue = value;
	} else if (typeof value === "object") {
		// Handle objects that might contain text
		// Try common text properties
		if (value.__cdata) strValue = String(value.__cdata);
		else if (value["#text"]) strValue = String(value["#text"]);
		else if (value.text) strValue = String(value.text);
		// Last resort: return empty string
		else return "";
	} else {
		// For numbers, booleans, etc.
		strValue = String(value);
	}

	// Decode HTML entities before returning
	return decodeHtmlEntities(strValue);
}

/**
 * Safely truncate a string to a maximum length
 */
export function truncateString(str: any, maxLength: number): string {
	const strValue = extractStringValue(str);
	if (!strValue) return "";
	const trimmed = strValue.trim();
	if (trimmed.length <= maxLength) return trimmed;
	return trimmed.substring(0, maxLength - 3) + "...";
}

/**
 * Validate and sanitize feed data for insertion
 */
export function sanitizeFeedData(feedData: any, feed?: any) {
	// Extract title from feedData or feed, handling various formats
	const titleValue = feedData?.title || feed?.title || "Untitled Feed";
	const descValue =
		feedData?.description || feedData?.subtitle || feed?.description || "";

	return {
		title: truncateString(titleValue, 200),
		description: truncateString(descValue, 1000),
	};
}

/**
 * Validate and sanitize post data for insertion
 */
export function sanitizePostData(
	post: any,
	isAtom: boolean,
	feedTitle: string,
) {
	return {
		title: truncateString(post.title || "Untitled", 1000),
		author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
		link: truncateString(extractPostLink(post, isAtom), 1000),
	};
}

1	import { XMLParser } from "fast-xml-parser";
2	import { COMMON_FEED_PATHS } from "./feed-discovery";
3
4	const parser = new XMLParser({
5	ignoreAttributes: false,
6	attributeNamePrefix: "@_",
7	textNodeName: "#text",
8	cdataPropName: "__cdata",
9	parseAttributeValue: true,
10	trimValues: true,
11	});
12
13	/**
14	* Decodes HTML entities in a string
15	* Handles both named entities (&) and numeric entities (&, &)
16	*/
17	function decodeHtmlEntities(text: string): string {
18	if (!text \|\| typeof text !== "string") return text;
19
20	// Create a temporary element to use browser's built-in HTML decoding
21	if (typeof document !== "undefined") {
22	const textarea = document.createElement("textarea");
23	textarea.innerHTML = text;
24	return textarea.value;
25	}
26
27	// Fallback for non-browser environments (though we're in a browser app)
28	// Handle common HTML entities manually
29	const entities: Record<string, string> = {
30	"&": "&",
31	"<": "<",
32	">": ">",
33	""": '"',
34	"'": "'",
35	"'": "'",
36	"’": "'",
37	"‘": "'",
38	"“": '"',
39	"”": '"',
40	"–": "–",
41	"—": "—",
42	"&": "&",
43	};
44
45	let decoded = text;
46	for (const [entity, char] of Object.entries(entities)) {
47	decoded = decoded.replace(new RegExp(entity, "g"), char);
48	}
49
50	// Handle numeric entities like ’
51	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
52	return String.fromCharCode(dec);
53	});
54
55	// Handle hex entities like '
56	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
57	return String.fromCharCode(parseInt(hex, 16));
58	});
59
60	return decoded;
61	}
62
63	export interface ParsedFeedData {
64	feedData: any;
65	posts: any[];
66	isAtom: boolean;
67	}
68
69	/**
70	* Fetches XML data from a URL with CORS fallback
71	*/
72	export async function fetchFeedWithFallback(url: string): Promise<string> {
73	try {
74	// Try to fetch directly first
75	const response = await fetch(url);
76	return await response.text();
77	} catch {
78	// Fall back to primary CORS proxy if direct fetch fails
79	try {
80	const response = await fetch(
81	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
82	);
83	return await response.text();
84	} catch {
85	// Fall back to secondary CORS proxy if primary fails
86	const response = await fetch(
87	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
88	);
89	return await response.text();
90	}
91	}
92	}
93
94	/**
95	* Parses XML data and determines if it's RSS or Atom feed
96	*/
97	export function parseFeedXml(xmlData: string): ParsedFeedData {
98	let parsedXmlData: any;
99
100	try {
101	parsedXmlData = parser.parse(xmlData);
102	} catch (error) {
103	throw new Error(
104	`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
105	);
106	}
107
108	// Determine if it's RSS or Atom feed
109	let feedData: any;
110	let posts: any[];
111	let isAtom = false;
112
113	if (parsedXmlData.rss) {
114	// RSS feed
115	feedData = parsedXmlData.rss.channel;
116	if (!feedData) {
117	throw new Error("RSS feed missing channel element");
118	}
119	const items = feedData.item \|\| [];
120	// Ensure posts is always an array (single item might not be in array)
121	posts = Array.isArray(items) ? items : items ? [items] : [];
122	} else if (parsedXmlData.feed) {
123	// Atom feed
124	feedData = parsedXmlData.feed;
125	const entries = feedData.entry \|\| [];
126	// Ensure posts is always an array (single entry might not be in array)
127	posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
128	isAtom = true;
129	} else if (parsedXmlData["rdf:RDF"]) {
130	// RDF/RSS 1.0 feed
131	feedData = parsedXmlData["rdf:RDF"].channel;
132	const items = parsedXmlData["rdf:RDF"].item \|\| [];
133	posts = Array.isArray(items) ? items : items ? [items] : [];
134	isAtom = false;
135	} else {
136	// Log available root elements for debugging
137	const rootKeys = Object.keys(parsedXmlData);
138	throw new Error(
139	`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
140	);
141	}
142
143	// Filter out empty objects from posts array
144	posts = posts.filter((post) => post && Object.keys(post).length > 0);
145
146	return { feedData, posts, isAtom };
147	}
148
149	/**
150	* Discovers RSS/Atom feed URL from a website URL
151	*/
152	export async function discoverFeed(websiteUrl: string): Promise<{
153	feedUrl: string;
154	xmlData: string;
155	} \| null> {
156	const urlObj = new URL(websiteUrl);
157	const origin = urlObj.origin;
158
159	for (const path of COMMON_FEED_PATHS) {
160	const testUrl = `${origin}${path}`;
161
162	try {
163	// Try primary CORS proxy
164	let response: Response;
165	try {
166	response = await fetch(
167	`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
168	);
169	} catch {
170	// Fall back to secondary CORS proxy
171	response = await fetch(
172	`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
173	);
174	}
175
176	if (response.ok) {
177	const text = await response.text();
178	// Quick check if it looks like XML
179	if (
180	text.trim().startsWith("<?xml") \|\|
181	text.includes("<rss") \|\|
182	text.includes("<feed")
183	) {
184	return { feedUrl: testUrl, xmlData: text };
185	}
186	}
187	} catch (error) {
188	continue;
189	}
190	}
191
192	return null;
193	}
194
195	/**
196	* Extracts YouTube channel ID from various YouTube URL formats
197	* Supports:
198	* - https://www.youtube.com/@ChannelHandle
199	* - https://www.youtube.com/channel/UC...
200	* - https://www.youtube.com/c/ChannelName
201	* - https://www.youtube.com/user/Username
202	*/
203	export async function extractYouTubeChannelId(
204	url: string,
205	): Promise<string \| null> {
206	try {
207	// Direct channel ID format
208	if (url.includes("/channel/")) {
209	const match = url.match(/\/channel\/([^/?]+)/);
210	return match ? match[1] : null;
211	}
212
213	// Handle @ format - need to fetch the page to get channel ID
214	if (url.includes("/@")) {
215	const handle = url.match(/\/@([^/?]+)/)?.[1];
216	if (!handle) return null;
217
218	// Fetch the YouTube page to extract the channel ID from meta tags
219	try {
220	let response: Response;
221	try {
222	response = await fetch(
223	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
224	);
225	} catch {
226	// Fall back to secondary CORS proxy
227	response = await fetch(
228	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
229	);
230	}
231	const html = await response.text();
232
233	// Look for channel ID in various places
234	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
235	if (channelIdMatch) {
236	return channelIdMatch[1];
237	}
238
239	// Alternative: look in meta tags
240	const metaMatch = html.match(
241	/<meta itemprop="channelId" content="([^"]+)">/,
242	);
243	if (metaMatch) {
244	return metaMatch[1];
245	}
246
247	// Alternative: look in link tags
248	const linkMatch = html.match(
249	/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
250	);
251	if (linkMatch) {
252	return linkMatch[1];
253	}
254	} catch (error) {
255	console.error("Failed to fetch YouTube page for channel ID:", error);
256	return null;
257	}
258	}
259
260	// For /c/ and /user/ formats, we also need to fetch the page
261	if (url.includes("/c/") \|\| url.includes("/user/")) {
262	try {
263	let response: Response;
264	try {
265	response = await fetch(
266	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
267	);
268	} catch {
269	// Fall back to secondary CORS proxy
270	response = await fetch(
271	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
272	);
273	}
274	const html = await response.text();
275
276	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
277	if (channelIdMatch) {
278	return channelIdMatch[1];
279	}
280	} catch (error) {
281	console.error("Failed to fetch YouTube page for channel ID:", error);
282	return null;
283	}
284	}
285
286	return null;
287	} catch (error) {
288	console.error("Error extracting YouTube channel ID:", error);
289	return null;
290	}
291	}
292
293	/**
294	* Converts YouTube channel URL to RSS feed URL
295	*/
296	export async function convertYouTubeUrlToFeed(
297	url: string,
298	): Promise<string \| null> {
299	const channelId = await extractYouTubeChannelId(url);
300	if (!channelId) return null;
301
302	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
303	}
304
305	/**
306	* Checks if a URL is a YouTube URL
307	*/
308	export function isYouTubeUrl(url: string): boolean {
309	return url.includes("youtube.com") \|\| url.includes("youtu.be");
310	}
311
312	/**
313	* Extracts YouTube video ID from a video URL
314	* Supports:
315	* - https://www.youtube.com/watch?v=VIDEO_ID
316	* - https://youtu.be/VIDEO_ID
317	* - https://www.youtube.com/embed/VIDEO_ID
318	*/
319	export function extractYouTubeVideoId(url: string): string \| null {
320	try {
321	// Standard watch URL
322	const watchMatch = url.match(/[?&]v=([^&]+)/);
323	if (watchMatch) return watchMatch[1];
324
325	// Short URL format
326	const shortMatch = url.match(/youtu\.be\/([^?]+)/);
327	if (shortMatch) return shortMatch[1];
328
329	// Embed URL format
330	const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
331	if (embedMatch) return embedMatch[1];
332
333	return null;
334	} catch {
335	return null;
336	}
337	}
338
339	/**
340	* Checks if a post is from a YouTube feed
341	*/
342	export function isYouTubePost(feedUrl: string \| null): boolean {
343	if (!feedUrl) return false;
344	return feedUrl.includes("youtube.com/feeds/videos.xml");
345	}
346
347	/**
348	* Extracts post link from RSS or Atom post entry
349	*/
350	export function extractPostLink(post: any, isAtom: boolean): string {
351	if (isAtom) {
352	// Handle Atom link which can be string, object, or array
353	if (typeof post.link === "string") {
354	return post.link \|\| post.id \|\| "#";
355	} else if (Array.isArray(post.link)) {
356	// Find 'alternate' link or use first link
357	const alternateLink = post.link.find(
358	(l: any) => l["@_rel"] === "alternate" \|\| !l["@_rel"],
359	);
360	return (
361	alternateLink?.["@_href"] \|\| post.link[0]?.["@_href"] \|\| post.id \|\| "#"
362	);
363	} else if (post.link && typeof post.link === "object") {
364	return post.link["@_href"] \|\| post.id \|\| "#";
365	}
366	return post.id \|\| "#";
367	}
368
369	// RSS feed
370	const link = post.link \|\| post.guid \|\| post.id;
371	if (!link) return "#";
372
373	// Handle link as object (sometimes RSS parsers do this)
374	if (typeof link === "object") {
375	return link["#text"] \|\| link.__cdata \|\| "#";
376	}
377
378	return String(link);
379	}
380
381	/**
382	* Extracts author from RSS or Atom post entry
383	*/
384	export function extractPostAuthor(
385	post: any,
386	isAtom: boolean,
387	feedTitle: string,
388	): string {
389	if (isAtom) {
390	// Atom can have author as object with name property
391	const author = post.author;
392	if (typeof author === "object" && author !== null) {
393	return author.name \|\| author["#text"] \|\| feedTitle;
394	}
395	return author \|\| feedTitle;
396	}
397
398	// RSS feed
399	const author = post.author \|\| post["dc:creator"] \|\| post.creator;
400	if (!author) return feedTitle;
401
402	// Handle author as object
403	if (typeof author === "object") {
404	return author["#text"] \|\| author.__cdata \|\| feedTitle;
405	}
406
407	return String(author);
408	}
409
410	/**
411	* Extracts content from RSS or Atom post entry
412	*/
413	export function extractPostContent(post: any, postLink?: string): string {
414	// Try various content fields in order of preference
415	const content =
416	post["content:encoded"] \|\| post.content \|\| post.description \|\| post.summary;
417
418	// Default fallback message
419	const fallbackMessage = postLink
420	? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
421	: "Please open on the web";
422
423	// Handle different content structures
424	if (typeof content === "string") {
425	const trimmed = content.trim();
426	return trimmed.length > 0 ? trimmed : fallbackMessage;
427	} else if (content && typeof content === "object") {
428	// Handle CDATA or nested text
429	const extracted = content.__cdata \|\| content["#text"] \|\| "";
430	const trimmed = String(extracted).trim();
431	return trimmed.length > 0 ? trimmed : fallbackMessage;
432	}
433
434	// No content found - this is fine for link-only feeds
435	return fallbackMessage;
436	}
437
438	/**
439	* Normalizes date strings to handle problematic formats like "24:00:00"
440	* which causes errors in WebKit browsers
441	*/
442	function normalizeDateString(dateString: string): string {
443	if (!dateString \|\| typeof dateString !== 'string') return dateString;
444
445	// Handle the 24:00:00 time format by converting it to 00:00:00 of the next day
446	if (dateString.includes('24:00:00')) {
447	// Replace 24:00:00 with 00:00:00
448	const normalizedDate = dateString.replace(/24:00:00/, '00:00:00');
449
450	try {
451	// Parse the normalized date and add one day
452	const tempDate = new Date(normalizedDate);
453	if (!isNaN(tempDate.getTime())) {
454	tempDate.setDate(tempDate.getDate() + 1);
455	return tempDate.toISOString();
456	}
457	} catch {
458	// If parsing fails, continue with the original string replacement
459	}
460
461	return normalizedDate;
462	}
463
464	return dateString;
465	}
466
467	/**
468	* Extracts published date from RSS or Atom post entry
469	*/
470	export function extractPostDate(post: any, isAtom?: boolean): string {
471	try {
472	let dateValue: any;
473
474	if (isAtom) {
475	// For Atom feeds, prioritize published date over updated date
476	dateValue = post.published \|\| post.updated;
477	} else {
478	// For RSS feeds, use pubDate first, then fall back to Atom fields
479	dateValue = post.pubDate \|\| post.published \|\| post.updated;
480	}
481
482	if (!dateValue) {
483	return new Date().toISOString(); // Use current date if no date found
484	}
485
486	// Normalize the date string to handle problematic formats
487	const normalizedDateValue = normalizeDateString(String(dateValue));
488
489	const parsedDate = new Date(normalizedDateValue);
490	// Check if date is valid
491	if (isNaN(parsedDate.getTime())) {
492	return new Date().toISOString();
493	}
494	return parsedDate.toISOString();
495	} catch {
496	return new Date().toISOString();
497	}
498	}
499
500	/**
501	* Extract string value from various data types and decode HTML entities
502	*/
503	function extractStringValue(value: any): string {
504	if (!value) return "";
505
506	let strValue = "";
507
508	if (typeof value === "string") {
509	strValue = value;
510	} else if (typeof value === "object") {
511	// Handle objects that might contain text
512	// Try common text properties
513	if (value.__cdata) strValue = String(value.__cdata);
514	else if (value["#text"]) strValue = String(value["#text"]);
515	else if (value.text) strValue = String(value.text);
516	// Last resort: return empty string
517	else return "";
518	} else {
519	// For numbers, booleans, etc.
520	strValue = String(value);
521	}
522
523	// Decode HTML entities before returning
524	return decodeHtmlEntities(strValue);
525	}
526
527	/**
528	* Safely truncate a string to a maximum length
529	*/
530	export function truncateString(str: any, maxLength: number): string {
531	const strValue = extractStringValue(str);
532	if (!strValue) return "";
533	const trimmed = strValue.trim();
534	if (trimmed.length <= maxLength) return trimmed;
535	return trimmed.substring(0, maxLength - 3) + "...";
536	}
537
538	/**
539	* Validate and sanitize feed data for insertion
540	*/
541	export function sanitizeFeedData(feedData: any, feed?: any) {
542	// Extract title from feedData or feed, handling various formats
543	const titleValue = feedData?.title \|\| feed?.title \|\| "Untitled Feed";
544	const descValue =
545	feedData?.description \|\| feedData?.subtitle \|\| feed?.description \|\| "";
546
547	return {
548	title: truncateString(titleValue, 200),
549	description: truncateString(descValue, 1000),
550	};
551	}
552
553	/**
554	* Validate and sanitize post data for insertion
555	*/
556	export function sanitizePostData(
557	post: any,
558	isAtom: boolean,
559	feedTitle: string,
560	) {
561	return {
562	title: truncateString(post.title \|\| "Untitled", 1000),
563	author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
564	link: truncateString(extractPostLink(post, isAtom), 1000),
565	};
566	}