git.stevedylan.dev

src/lib/feed-operations.ts 13.7 K raw

import { XMLParser } from "fast-xml-parser";
import { COMMON_FEED_PATHS } from "./feed-discovery";

const parser = new XMLParser({
	ignoreAttributes: false,
	attributeNamePrefix: "@_",
	textNodeName: "#text",
	cdataPropName: "__cdata",
	parseAttributeValue: true,
	trimValues: true,
});

/**
 * Decodes HTML entities in a string
 * Handles both named entities (&amp;) and numeric entities (&#038;, &#x26;)
 */
function decodeHtmlEntities(text: string): string {
	if (!text || typeof text !== "string") return text;

	// Create a temporary element to use browser's built-in HTML decoding
	if (typeof document !== "undefined") {
		const textarea = document.createElement("textarea");
		textarea.innerHTML = text;
		return textarea.value;
	}

	// Fallback for non-browser environments (though we're in a browser app)
	// Handle common HTML entities manually
	const entities: Record<string, string> = {
		"&amp;": "&",
		"&lt;": "<",
		"&gt;": ">",
		"&quot;": '"',
		"&#039;": "'",
		"&apos;": "'",
		"&#8217;": "'",
		"&#8216;": "'",
		"&#8220;": '"',
		"&#8221;": '"',
		"&#8211;": "–",
		"&#8212;": "—",
		"&#038;": "&",
	};

	let decoded = text;
	for (const [entity, char] of Object.entries(entities)) {
		decoded = decoded.replace(new RegExp(entity, "g"), char);
	}

	// Handle numeric entities like &#8217;
	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
		return String.fromCharCode(dec);
	});

	// Handle hex entities like &#x27;
	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
		return String.fromCharCode(parseInt(hex, 16));
	});

	return decoded;
}

export interface ParsedFeedData {
	feedData: any;
	posts: any[];
	isAtom: boolean;
}

/**
 * Fetches XML data from a URL with CORS fallback
 */
export async function fetchFeedWithFallback(url: string): Promise<string> {
	try {
		// Try to fetch directly first
		const response = await fetch(url);
		return await response.text();
	} catch {
		// Fall back to primary CORS proxy if direct fetch fails
		try {
			const response = await fetch(
				`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		} catch {
			// Fall back to secondary CORS proxy if primary fails
			const response = await fetch(
				`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
			);
			return await response.text();
		}
	}
}

/**
 * Parses XML data and determines if it's RSS or Atom feed
 */
export function parseFeedXml(xmlData: string): ParsedFeedData {
	let parsedXmlData: any;

	try {
		parsedXmlData = parser.parse(xmlData);
	} catch (error) {
		throw new Error(
			`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
		);
	}

	// Determine if it's RSS or Atom feed
	let feedData: any;
	let posts: any[];
	let isAtom = false;

	if (parsedXmlData.rss) {
		// RSS feed
		feedData = parsedXmlData.rss.channel;
		if (!feedData) {
			throw new Error("RSS feed missing channel element");
		}
		const items = feedData.item || [];
		// Ensure posts is always an array (single item might not be in array)
		posts = Array.isArray(items) ? items : items ? [items] : [];
	} else if (parsedXmlData.feed) {
		// Atom feed
		feedData = parsedXmlData.feed;
		const entries = feedData.entry || [];
		// Ensure posts is always an array (single entry might not be in array)
		posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
		isAtom = true;
	} else if (parsedXmlData["rdf:RDF"]) {
		// RDF/RSS 1.0 feed
		feedData = parsedXmlData["rdf:RDF"].channel;
		const items = parsedXmlData["rdf:RDF"].item || [];
		posts = Array.isArray(items) ? items : items ? [items] : [];
		isAtom = false;
	} else {
		// Log available root elements for debugging
		const rootKeys = Object.keys(parsedXmlData);
		throw new Error(
			`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
		);
	}

	// Filter out empty objects from posts array
	posts = posts.filter((post) => post && Object.keys(post).length > 0);

	return { feedData, posts, isAtom };
}

/**
 * Discovers RSS/Atom feed URL from a website URL
 */
export async function discoverFeed(websiteUrl: string): Promise<{
	feedUrl: string;
	xmlData: string;
} | null> {
	const urlObj = new URL(websiteUrl);
	const origin = urlObj.origin;

	for (const path of COMMON_FEED_PATHS) {
		const testUrl = `${origin}${path}`;

		try {
			// Try primary CORS proxy
			let response: Response;
			try {
				response = await fetch(
					`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			} catch {
				// Fall back to secondary CORS proxy
				response = await fetch(
					`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
				);
			}

			if (response.ok) {
				const text = await response.text();
				// Quick check if it looks like XML
				if (
					text.trim().startsWith("<?xml") ||
					text.includes("<rss") ||
					text.includes("<feed")
				) {
					return { feedUrl: testUrl, xmlData: text };
				}
			}
		} catch (error) {
			continue;
		}
	}

	return null;
}

/**
 * Extracts YouTube channel ID from various YouTube URL formats
 * Supports:
 * - https://www.youtube.com/@ChannelHandle
 * - https://www.youtube.com/channel/UC...
 * - https://www.youtube.com/c/ChannelName
 * - https://www.youtube.com/user/Username
 */
export async function extractYouTubeChannelId(
	url: string,
): Promise<string | null> {
	try {
		// Direct channel ID format
		if (url.includes("/channel/")) {
			const match = url.match(/\/channel\/([^/?]+)/);
			return match ? match[1] : null;
		}

		// Handle @ format - need to fetch the page to get channel ID
		if (url.includes("/@")) {
			const handle = url.match(/\/@([^/?]+)/)?.[1];
			if (!handle) return null;

			// Fetch the YouTube page to extract the channel ID from meta tags
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				// Look for channel ID in various places
				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}

				// Alternative: look in meta tags
				const metaMatch = html.match(
					/<meta itemprop="channelId" content="([^"]+)">/,
				);
				if (metaMatch) {
					return metaMatch[1];
				}

				// Alternative: look in link tags
				const linkMatch = html.match(
					/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
				);
				if (linkMatch) {
					return linkMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		// For /c/ and /user/ formats, we also need to fetch the page
		if (url.includes("/c/") || url.includes("/user/")) {
			try {
				let response: Response;
				try {
					response = await fetch(
						`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				} catch {
					// Fall back to secondary CORS proxy
					response = await fetch(
						`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
					);
				}
				const html = await response.text();

				const channelIdMatch = html.match(/channelId":"([^"]+)"/);
				if (channelIdMatch) {
					return channelIdMatch[1];
				}
			} catch (error) {
				console.error("Failed to fetch YouTube page for channel ID:", error);
				return null;
			}
		}

		return null;
	} catch (error) {
		console.error("Error extracting YouTube channel ID:", error);
		return null;
	}
}

/**
 * Converts YouTube channel URL to RSS feed URL
 */
export async function convertYouTubeUrlToFeed(
	url: string,
): Promise<string | null> {
	const channelId = await extractYouTubeChannelId(url);
	if (!channelId) return null;

	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
}

/**
 * Checks if a URL is a YouTube URL
 */
export function isYouTubeUrl(url: string): boolean {
	return url.includes("youtube.com") || url.includes("youtu.be");
}

/**
 * Extracts YouTube video ID from a video URL
 * Supports:
 * - https://www.youtube.com/watch?v=VIDEO_ID
 * - https://youtu.be/VIDEO_ID
 * - https://www.youtube.com/embed/VIDEO_ID
 */
export function extractYouTubeVideoId(url: string): string | null {
	try {
		// Standard watch URL
		const watchMatch = url.match(/[?&]v=([^&]+)/);
		if (watchMatch) return watchMatch[1];

		// Short URL format
		const shortMatch = url.match(/youtu\.be\/([^?]+)/);
		if (shortMatch) return shortMatch[1];

		// Embed URL format
		const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
		if (embedMatch) return embedMatch[1];

		return null;
	} catch {
		return null;
	}
}

/**
 * Checks if a post is from a YouTube feed
 */
export function isYouTubePost(feedUrl: string | null): boolean {
	if (!feedUrl) return false;
	return feedUrl.includes("youtube.com/feeds/videos.xml");
}

/**
 * Extracts post link from RSS or Atom post entry
 */
export function extractPostLink(post: any, isAtom: boolean): string {
	if (isAtom) {
		// Handle Atom link which can be string, object, or array
		if (typeof post.link === "string") {
			return post.link || post.id || "#";
		} else if (Array.isArray(post.link)) {
			// Find 'alternate' link or use first link
			const alternateLink = post.link.find(
				(l: any) => l["@_rel"] === "alternate" || !l["@_rel"],
			);
			return (
				alternateLink?.["@_href"] || post.link[0]?.["@_href"] || post.id || "#"
			);
		} else if (post.link && typeof post.link === "object") {
			return post.link["@_href"] || post.id || "#";
		}
		return post.id || "#";
	}

	// RSS feed
	const link = post.link || post.guid || post.id;
	if (!link) return "#";

	// Handle link as object (sometimes RSS parsers do this)
	if (typeof link === "object") {
		return link["#text"] || link.__cdata || "#";
	}

	return String(link);
}

/**
 * Extracts author from RSS or Atom post entry
 */
export function extractPostAuthor(
	post: any,
	isAtom: boolean,
	feedTitle: string,
): string {
	if (isAtom) {
		// Atom can have author as object with name property
		const author = post.author;
		if (typeof author === "object" && author !== null) {
			return author.name || author["#text"] || feedTitle;
		}
		return author || feedTitle;
	}

	// RSS feed
	const author = post.author || post["dc:creator"] || post.creator;
	if (!author) return feedTitle;

	// Handle author as object
	if (typeof author === "object") {
		return author["#text"] || author.__cdata || feedTitle;
	}

	return String(author);
}

/**
 * Extracts content from RSS or Atom post entry
 */
export function extractPostContent(post: any, postLink?: string): string {
	// Try various content fields in order of preference
	const content =
		post["content:encoded"] || post.content || post.description || post.summary;

	// Default fallback message
	const fallbackMessage = postLink
		? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
		: "Please open on the web";

	// Handle different content structures
	if (typeof content === "string") {
		const trimmed = content.trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	} else if (content && typeof content === "object") {
		// Handle CDATA or nested text
		const extracted = content.__cdata || content["#text"] || "";
		const trimmed = String(extracted).trim();
		return trimmed.length > 0 ? trimmed : fallbackMessage;
	}

	// No content found - this is fine for link-only feeds
	return fallbackMessage;
}

/**
 * Extracts published date from RSS or Atom post entry
 */
export function extractPostDate(post: any): string {
	try {
		const dateValue = post.pubDate || post.updated || post.published;
		if (!dateValue) {
			return new Date().toISOString(); // Use current date if no date found
		}
		const parsedDate = new Date(dateValue);
		// Check if date is valid
		if (isNaN(parsedDate.getTime())) {
			return new Date().toISOString();
		}
		return parsedDate.toISOString();
	} catch {
		return new Date().toISOString();
	}
}

/**
 * Extract string value from various data types and decode HTML entities
 */
function extractStringValue(value: any): string {
	if (!value) return "";

	let strValue = "";

	if (typeof value === "string") {
		strValue = value;
	} else if (typeof value === "object") {
		// Handle objects that might contain text
		// Try common text properties
		if (value.__cdata) strValue = String(value.__cdata);
		else if (value["#text"]) strValue = String(value["#text"]);
		else if (value.text) strValue = String(value.text);
		// Last resort: return empty string
		else return "";
	} else {
		// For numbers, booleans, etc.
		strValue = String(value);
	}

	// Decode HTML entities before returning
	return decodeHtmlEntities(strValue);
}

/**
 * Safely truncate a string to a maximum length
 */
export function truncateString(str: any, maxLength: number): string {
	const strValue = extractStringValue(str);
	if (!strValue) return "";
	const trimmed = strValue.trim();
	if (trimmed.length <= maxLength) return trimmed;
	return trimmed.substring(0, maxLength - 3) + "...";
}

/**
 * Validate and sanitize feed data for insertion
 */
export function sanitizeFeedData(feedData: any, feed?: any) {
	// Extract title from feedData or feed, handling various formats
	const titleValue = feedData?.title || feed?.title || "Untitled Feed";
	const descValue =
		feedData?.description || feedData?.subtitle || feed?.description || "";

	return {
		title: truncateString(titleValue, 200),
		description: truncateString(descValue, 1000),
	};
}

/**
 * Validate and sanitize post data for insertion
 */
export function sanitizePostData(
	post: any,
	isAtom: boolean,
	feedTitle: string,
) {
	return {
		title: truncateString(post.title || "Untitled", 1000),
		author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
		link: truncateString(extractPostLink(post, isAtom), 1000),
	};
}

1	import { XMLParser } from "fast-xml-parser";
2	import { COMMON_FEED_PATHS } from "./feed-discovery";
3
4	const parser = new XMLParser({
5	ignoreAttributes: false,
6	attributeNamePrefix: "@_",
7	textNodeName: "#text",
8	cdataPropName: "__cdata",
9	parseAttributeValue: true,
10	trimValues: true,
11	});
12
13	/**
14	* Decodes HTML entities in a string
15	* Handles both named entities (&) and numeric entities (&, &)
16	*/
17	function decodeHtmlEntities(text: string): string {
18	if (!text \|\| typeof text !== "string") return text;
19
20	// Create a temporary element to use browser's built-in HTML decoding
21	if (typeof document !== "undefined") {
22	const textarea = document.createElement("textarea");
23	textarea.innerHTML = text;
24	return textarea.value;
25	}
26
27	// Fallback for non-browser environments (though we're in a browser app)
28	// Handle common HTML entities manually
29	const entities: Record<string, string> = {
30	"&": "&",
31	"<": "<",
32	">": ">",
33	""": '"',
34	"'": "'",
35	"'": "'",
36	"’": "'",
37	"‘": "'",
38	"“": '"',
39	"”": '"',
40	"–": "–",
41	"—": "—",
42	"&": "&",
43	};
44
45	let decoded = text;
46	for (const [entity, char] of Object.entries(entities)) {
47	decoded = decoded.replace(new RegExp(entity, "g"), char);
48	}
49
50	// Handle numeric entities like ’
51	decoded = decoded.replace(/&#(\d+);/g, (_match, dec) => {
52	return String.fromCharCode(dec);
53	});
54
55	// Handle hex entities like '
56	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
57	return String.fromCharCode(parseInt(hex, 16));
58	});
59
60	return decoded;
61	}
62
63	export interface ParsedFeedData {
64	feedData: any;
65	posts: any[];
66	isAtom: boolean;
67	}
68
69	/**
70	* Fetches XML data from a URL with CORS fallback
71	*/
72	export async function fetchFeedWithFallback(url: string): Promise<string> {
73	try {
74	// Try to fetch directly first
75	const response = await fetch(url);
76	return await response.text();
77	} catch {
78	// Fall back to primary CORS proxy if direct fetch fails
79	try {
80	const response = await fetch(
81	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
82	);
83	return await response.text();
84	} catch {
85	// Fall back to secondary CORS proxy if primary fails
86	const response = await fetch(
87	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
88	);
89	return await response.text();
90	}
91	}
92	}
93
94	/**
95	* Parses XML data and determines if it's RSS or Atom feed
96	*/
97	export function parseFeedXml(xmlData: string): ParsedFeedData {
98	let parsedXmlData: any;
99
100	try {
101	parsedXmlData = parser.parse(xmlData);
102	} catch (error) {
103	throw new Error(
104	`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
105	);
106	}
107
108	// Determine if it's RSS or Atom feed
109	let feedData: any;
110	let posts: any[];
111	let isAtom = false;
112
113	if (parsedXmlData.rss) {
114	// RSS feed
115	feedData = parsedXmlData.rss.channel;
116	if (!feedData) {
117	throw new Error("RSS feed missing channel element");
118	}
119	const items = feedData.item \|\| [];
120	// Ensure posts is always an array (single item might not be in array)
121	posts = Array.isArray(items) ? items : items ? [items] : [];
122	} else if (parsedXmlData.feed) {
123	// Atom feed
124	feedData = parsedXmlData.feed;
125	const entries = feedData.entry \|\| [];
126	// Ensure posts is always an array (single entry might not be in array)
127	posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
128	isAtom = true;
129	} else if (parsedXmlData["rdf:RDF"]) {
130	// RDF/RSS 1.0 feed
131	feedData = parsedXmlData["rdf:RDF"].channel;
132	const items = parsedXmlData["rdf:RDF"].item \|\| [];
133	posts = Array.isArray(items) ? items : items ? [items] : [];
134	isAtom = false;
135	} else {
136	// Log available root elements for debugging
137	const rootKeys = Object.keys(parsedXmlData);
138	throw new Error(
139	`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
140	);
141	}
142
143	// Filter out empty objects from posts array
144	posts = posts.filter((post) => post && Object.keys(post).length > 0);
145
146	return { feedData, posts, isAtom };
147	}
148
149	/**
150	* Discovers RSS/Atom feed URL from a website URL
151	*/
152	export async function discoverFeed(websiteUrl: string): Promise<{
153	feedUrl: string;
154	xmlData: string;
155	} \| null> {
156	const urlObj = new URL(websiteUrl);
157	const origin = urlObj.origin;
158
159	for (const path of COMMON_FEED_PATHS) {
160	const testUrl = `${origin}${path}`;
161
162	try {
163	// Try primary CORS proxy
164	let response: Response;
165	try {
166	response = await fetch(
167	`https://proxy.alcove.tools?url=${encodeURIComponent(testUrl)}`,
168	);
169	} catch {
170	// Fall back to secondary CORS proxy
171	response = await fetch(
172	`https://proxy2.alcove.tools?url=${encodeURIComponent(testUrl)}`,
173	);
174	}
175
176	if (response.ok) {
177	const text = await response.text();
178	// Quick check if it looks like XML
179	if (
180	text.trim().startsWith("<?xml") \|\|
181	text.includes("<rss") \|\|
182	text.includes("<feed")
183	) {
184	return { feedUrl: testUrl, xmlData: text };
185	}
186	}
187	} catch (error) {
188	continue;
189	}
190	}
191
192	return null;
193	}
194
195	/**
196	* Extracts YouTube channel ID from various YouTube URL formats
197	* Supports:
198	* - https://www.youtube.com/@ChannelHandle
199	* - https://www.youtube.com/channel/UC...
200	* - https://www.youtube.com/c/ChannelName
201	* - https://www.youtube.com/user/Username
202	*/
203	export async function extractYouTubeChannelId(
204	url: string,
205	): Promise<string \| null> {
206	try {
207	// Direct channel ID format
208	if (url.includes("/channel/")) {
209	const match = url.match(/\/channel\/([^/?]+)/);
210	return match ? match[1] : null;
211	}
212
213	// Handle @ format - need to fetch the page to get channel ID
214	if (url.includes("/@")) {
215	const handle = url.match(/\/@([^/?]+)/)?.[1];
216	if (!handle) return null;
217
218	// Fetch the YouTube page to extract the channel ID from meta tags
219	try {
220	let response: Response;
221	try {
222	response = await fetch(
223	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
224	);
225	} catch {
226	// Fall back to secondary CORS proxy
227	response = await fetch(
228	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
229	);
230	}
231	const html = await response.text();
232
233	// Look for channel ID in various places
234	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
235	if (channelIdMatch) {
236	return channelIdMatch[1];
237	}
238
239	// Alternative: look in meta tags
240	const metaMatch = html.match(
241	/<meta itemprop="channelId" content="([^"]+)">/,
242	);
243	if (metaMatch) {
244	return metaMatch[1];
245	}
246
247	// Alternative: look in link tags
248	const linkMatch = html.match(
249	/<link rel="canonical" href="https:\/\/www\.youtube\.com\/channel\/([^"]+)">/,
250	);
251	if (linkMatch) {
252	return linkMatch[1];
253	}
254	} catch (error) {
255	console.error("Failed to fetch YouTube page for channel ID:", error);
256	return null;
257	}
258	}
259
260	// For /c/ and /user/ formats, we also need to fetch the page
261	if (url.includes("/c/") \|\| url.includes("/user/")) {
262	try {
263	let response: Response;
264	try {
265	response = await fetch(
266	`https://proxy.alcove.tools?url=${encodeURIComponent(url)}`,
267	);
268	} catch {
269	// Fall back to secondary CORS proxy
270	response = await fetch(
271	`https://proxy2.alcove.tools?url=${encodeURIComponent(url)}`,
272	);
273	}
274	const html = await response.text();
275
276	const channelIdMatch = html.match(/channelId":"([^"]+)"/);
277	if (channelIdMatch) {
278	return channelIdMatch[1];
279	}
280	} catch (error) {
281	console.error("Failed to fetch YouTube page for channel ID:", error);
282	return null;
283	}
284	}
285
286	return null;
287	} catch (error) {
288	console.error("Error extracting YouTube channel ID:", error);
289	return null;
290	}
291	}
292
293	/**
294	* Converts YouTube channel URL to RSS feed URL
295	*/
296	export async function convertYouTubeUrlToFeed(
297	url: string,
298	): Promise<string \| null> {
299	const channelId = await extractYouTubeChannelId(url);
300	if (!channelId) return null;
301
302	return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
303	}
304
305	/**
306	* Checks if a URL is a YouTube URL
307	*/
308	export function isYouTubeUrl(url: string): boolean {
309	return url.includes("youtube.com") \|\| url.includes("youtu.be");
310	}
311
312	/**
313	* Extracts YouTube video ID from a video URL
314	* Supports:
315	* - https://www.youtube.com/watch?v=VIDEO_ID
316	* - https://youtu.be/VIDEO_ID
317	* - https://www.youtube.com/embed/VIDEO_ID
318	*/
319	export function extractYouTubeVideoId(url: string): string \| null {
320	try {
321	// Standard watch URL
322	const watchMatch = url.match(/[?&]v=([^&]+)/);
323	if (watchMatch) return watchMatch[1];
324
325	// Short URL format
326	const shortMatch = url.match(/youtu\.be\/([^?]+)/);
327	if (shortMatch) return shortMatch[1];
328
329	// Embed URL format
330	const embedMatch = url.match(/youtube\.com\/embed\/([^?]+)/);
331	if (embedMatch) return embedMatch[1];
332
333	return null;
334	} catch {
335	return null;
336	}
337	}
338
339	/**
340	* Checks if a post is from a YouTube feed
341	*/
342	export function isYouTubePost(feedUrl: string \| null): boolean {
343	if (!feedUrl) return false;
344	return feedUrl.includes("youtube.com/feeds/videos.xml");
345	}
346
347	/**
348	* Extracts post link from RSS or Atom post entry
349	*/
350	export function extractPostLink(post: any, isAtom: boolean): string {
351	if (isAtom) {
352	// Handle Atom link which can be string, object, or array
353	if (typeof post.link === "string") {
354	return post.link \|\| post.id \|\| "#";
355	} else if (Array.isArray(post.link)) {
356	// Find 'alternate' link or use first link
357	const alternateLink = post.link.find(
358	(l: any) => l["@_rel"] === "alternate" \|\| !l["@_rel"],
359	);
360	return (
361	alternateLink?.["@_href"] \|\| post.link[0]?.["@_href"] \|\| post.id \|\| "#"
362	);
363	} else if (post.link && typeof post.link === "object") {
364	return post.link["@_href"] \|\| post.id \|\| "#";
365	}
366	return post.id \|\| "#";
367	}
368
369	// RSS feed
370	const link = post.link \|\| post.guid \|\| post.id;
371	if (!link) return "#";
372
373	// Handle link as object (sometimes RSS parsers do this)
374	if (typeof link === "object") {
375	return link["#text"] \|\| link.__cdata \|\| "#";
376	}
377
378	return String(link);
379	}
380
381	/**
382	* Extracts author from RSS or Atom post entry
383	*/
384	export function extractPostAuthor(
385	post: any,
386	isAtom: boolean,
387	feedTitle: string,
388	): string {
389	if (isAtom) {
390	// Atom can have author as object with name property
391	const author = post.author;
392	if (typeof author === "object" && author !== null) {
393	return author.name \|\| author["#text"] \|\| feedTitle;
394	}
395	return author \|\| feedTitle;
396	}
397
398	// RSS feed
399	const author = post.author \|\| post["dc:creator"] \|\| post.creator;
400	if (!author) return feedTitle;
401
402	// Handle author as object
403	if (typeof author === "object") {
404	return author["#text"] \|\| author.__cdata \|\| feedTitle;
405	}
406
407	return String(author);
408	}
409
410	/**
411	* Extracts content from RSS or Atom post entry
412	*/
413	export function extractPostContent(post: any, postLink?: string): string {
414	// Try various content fields in order of preference
415	const content =
416	post["content:encoded"] \|\| post.content \|\| post.description \|\| post.summary;
417
418	// Default fallback message
419	const fallbackMessage = postLink
420	? `<p><a href="${postLink}" target="_blank" rel="noopener noreferrer">View post</a></p>`
421	: "Please open on the web";
422
423	// Handle different content structures
424	if (typeof content === "string") {
425	const trimmed = content.trim();
426	return trimmed.length > 0 ? trimmed : fallbackMessage;
427	} else if (content && typeof content === "object") {
428	// Handle CDATA or nested text
429	const extracted = content.__cdata \|\| content["#text"] \|\| "";
430	const trimmed = String(extracted).trim();
431	return trimmed.length > 0 ? trimmed : fallbackMessage;
432	}
433
434	// No content found - this is fine for link-only feeds
435	return fallbackMessage;
436	}
437
438	/**
439	* Extracts published date from RSS or Atom post entry
440	*/
441	export function extractPostDate(post: any): string {
442	try {
443	const dateValue = post.pubDate \|\| post.updated \|\| post.published;
444	if (!dateValue) {
445	return new Date().toISOString(); // Use current date if no date found
446	}
447	const parsedDate = new Date(dateValue);
448	// Check if date is valid
449	if (isNaN(parsedDate.getTime())) {
450	return new Date().toISOString();
451	}
452	return parsedDate.toISOString();
453	} catch {
454	return new Date().toISOString();
455	}
456	}
457
458	/**
459	* Extract string value from various data types and decode HTML entities
460	*/
461	function extractStringValue(value: any): string {
462	if (!value) return "";
463
464	let strValue = "";
465
466	if (typeof value === "string") {
467	strValue = value;
468	} else if (typeof value === "object") {
469	// Handle objects that might contain text
470	// Try common text properties
471	if (value.__cdata) strValue = String(value.__cdata);
472	else if (value["#text"]) strValue = String(value["#text"]);
473	else if (value.text) strValue = String(value.text);
474	// Last resort: return empty string
475	else return "";
476	} else {
477	// For numbers, booleans, etc.
478	strValue = String(value);
479	}
480
481	// Decode HTML entities before returning
482	return decodeHtmlEntities(strValue);
483	}
484
485	/**
486	* Safely truncate a string to a maximum length
487	*/
488	export function truncateString(str: any, maxLength: number): string {
489	const strValue = extractStringValue(str);
490	if (!strValue) return "";
491	const trimmed = strValue.trim();
492	if (trimmed.length <= maxLength) return trimmed;
493	return trimmed.substring(0, maxLength - 3) + "...";
494	}
495
496	/**
497	* Validate and sanitize feed data for insertion
498	*/
499	export function sanitizeFeedData(feedData: any, feed?: any) {
500	// Extract title from feedData or feed, handling various formats
501	const titleValue = feedData?.title \|\| feed?.title \|\| "Untitled Feed";
502	const descValue =
503	feedData?.description \|\| feedData?.subtitle \|\| feed?.description \|\| "";
504
505	return {
506	title: truncateString(titleValue, 200),
507	description: truncateString(descValue, 1000),
508	};
509	}
510
511	/**
512	* Validate and sanitize post data for insertion
513	*/
514	export function sanitizePostData(
515	post: any,
516	isAtom: boolean,
517	feedTitle: string,
518	) {
519	return {
520	title: truncateString(post.title \|\| "Untitled", 1000),
521	author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
522	link: truncateString(extractPostLink(post, isAtom), 1000),
523	};
524	}