chore: fixed character parsing for some feeds 1dd63ba7
Steve · 2025-11-06 22:03 1 file(s) · +67 −11
src/lib/feed-operations.ts +67 −11
10 10
	trimValues: true,
11 11
});
12 12
13 +
/**
14 +
 * Decodes HTML entities in a string
15 +
 * Handles both named entities (&) and numeric entities (&, &)
16 +
 */
17 +
function decodeHtmlEntities(text: string): string {
18 +
	if (!text || typeof text !== "string") return text;
19 +
20 +
	// Create a temporary element to use browser's built-in HTML decoding
21 +
	if (typeof document !== "undefined") {
22 +
		const textarea = document.createElement("textarea");
23 +
		textarea.innerHTML = text;
24 +
		return textarea.value;
25 +
	}
26 +
27 +
	// Fallback for non-browser environments (though we're in a browser app)
28 +
	// Handle common HTML entities manually
29 +
	const entities: Record<string, string> = {
30 +
		"&amp;": "&",
31 +
		"&lt;": "<",
32 +
		"&gt;": ">",
33 +
		"&quot;": '"',
34 +
		"&#039;": "'",
35 +
		"&apos;": "'",
36 +
		"&#8217;": "'",
37 +
		"&#8216;": "'",
38 +
		"&#8220;": '"',
39 +
		"&#8221;": '"',
40 +
		"&#8211;": "–",
41 +
		"&#8212;": "—",
42 +
		"&#038;": "&",
43 +
	};
44 +
45 +
	let decoded = text;
46 +
	for (const [entity, char] of Object.entries(entities)) {
47 +
		decoded = decoded.replace(new RegExp(entity, "g"), char);
48 +
	}
49 +
50 +
	// Handle numeric entities like &#8217;
51 +
	decoded = decoded.replace(/&#(\d+);/g, (match, dec) => {
52 +
		return String.fromCharCode(dec);
53 +
	});
54 +
55 +
	// Handle hex entities like &#x27;
56 +
	decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (match, hex) => {
57 +
		return String.fromCharCode(parseInt(hex, 16));
58 +
	});
59 +
60 +
	return decoded;
61 +
}
62 +
13 63
export interface ParsedFeedData {
14 64
	feedData: any;
15 65
	posts: any[];
386 436
}
387 437
388 438
/**
389 -
 * Extract string value from various data types
439 +
 * Extract string value from various data types and decode HTML entities
390 440
 */
391 441
function extractStringValue(value: any): string {
392 442
	if (!value) return "";
393 -
	if (typeof value === "string") return value;
443 +
444 +
	let strValue = "";
394 445
395 -
	// Handle objects that might contain text
396 -
	if (typeof value === "object") {
446 +
	if (typeof value === "string") {
447 +
		strValue = value;
448 +
	} else if (typeof value === "object") {
449 +
		// Handle objects that might contain text
397 450
		// Try common text properties
398 -
		if (value.__cdata) return String(value.__cdata);
399 -
		if (value["#text"]) return String(value["#text"]);
400 -
		if (value.text) return String(value.text);
401 -
		// Last resort: try to convert to string
402 -
		return "";
451 +
		if (value.__cdata) strValue = String(value.__cdata);
452 +
		else if (value["#text"]) strValue = String(value["#text"]);
453 +
		else if (value.text) strValue = String(value.text);
454 +
		// Last resort: return empty string
455 +
		else return "";
456 +
	} else {
457 +
		// For numbers, booleans, etc.
458 +
		strValue = String(value);
403 459
	}
404 460
405 -
	// For numbers, booleans, etc.
406 -
	return String(value);
461 +
	// Decode HTML entities before returning
462 +
	return decodeHtmlEntities(strValue);
407 463
}
408 464
409 465
/**