chore: fixed character parsing for some feeds
1dd63ba7
1 file(s) · +67 −11
| 10 | 10 | trimValues: true, |
|
| 11 | 11 | }); |
|
| 12 | 12 | ||
| 13 | + | /** |
|
| 14 | + | * Decodes HTML entities in a string |
|
| 15 | + | * Handles both named entities (&) and numeric entities (&, &) |
|
| 16 | + | */ |
|
| 17 | + | function decodeHtmlEntities(text: string): string { |
|
| 18 | + | if (!text || typeof text !== "string") return text; |
|
| 19 | + | ||
| 20 | + | // Create a temporary element to use browser's built-in HTML decoding |
|
| 21 | + | if (typeof document !== "undefined") { |
|
| 22 | + | const textarea = document.createElement("textarea"); |
|
| 23 | + | textarea.innerHTML = text; |
|
| 24 | + | return textarea.value; |
|
| 25 | + | } |
|
| 26 | + | ||
| 27 | + | // Fallback for non-browser environments (though we're in a browser app) |
|
| 28 | + | // Handle common HTML entities manually |
|
| 29 | + | const entities: Record<string, string> = { |
|
| 30 | + | "&": "&", |
|
| 31 | + | "<": "<", |
|
| 32 | + | ">": ">", |
|
| 33 | + | """: '"', |
|
| 34 | + | "'": "'", |
|
| 35 | + | "'": "'", |
|
| 36 | + | "’": "'", |
|
| 37 | + | "‘": "'", |
|
| 38 | + | "“": '"', |
|
| 39 | + | "”": '"', |
|
| 40 | + | "–": "–", |
|
| 41 | + | "—": "—", |
|
| 42 | + | "&": "&", |
|
| 43 | + | }; |
|
| 44 | + | ||
| 45 | + | let decoded = text; |
|
| 46 | + | for (const [entity, char] of Object.entries(entities)) { |
|
| 47 | + | decoded = decoded.replace(new RegExp(entity, "g"), char); |
|
| 48 | + | } |
|
| 49 | + | ||
| 50 | + | // Handle numeric entities like ’ |
|
| 51 | + | decoded = decoded.replace(/&#(\d+);/g, (match, dec) => { |
|
| 52 | + | return String.fromCharCode(dec); |
|
| 53 | + | }); |
|
| 54 | + | ||
| 55 | + | // Handle hex entities like ' |
|
| 56 | + | decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (match, hex) => { |
|
| 57 | + | return String.fromCharCode(parseInt(hex, 16)); |
|
| 58 | + | }); |
|
| 59 | + | ||
| 60 | + | return decoded; |
|
| 61 | + | } |
|
| 62 | + | ||
| 13 | 63 | export interface ParsedFeedData { |
|
| 14 | 64 | feedData: any; |
|
| 15 | 65 | posts: any[]; |
|
| 386 | 436 | } |
|
| 387 | 437 | ||
| 388 | 438 | /** |
|
| 389 | - | * Extract string value from various data types |
|
| 439 | + | * Extract string value from various data types and decode HTML entities |
|
| 390 | 440 | */ |
|
| 391 | 441 | function extractStringValue(value: any): string { |
|
| 392 | 442 | if (!value) return ""; |
|
| 393 | - | if (typeof value === "string") return value; |
|
| 443 | + | ||
| 444 | + | let strValue = ""; |
|
| 394 | 445 | ||
| 395 | - | // Handle objects that might contain text |
|
| 396 | - | if (typeof value === "object") { |
|
| 446 | + | if (typeof value === "string") { |
|
| 447 | + | strValue = value; |
|
| 448 | + | } else if (typeof value === "object") { |
|
| 449 | + | // Handle objects that might contain text |
|
| 397 | 450 | // Try common text properties |
|
| 398 | - | if (value.__cdata) return String(value.__cdata); |
|
| 399 | - | if (value["#text"]) return String(value["#text"]); |
|
| 400 | - | if (value.text) return String(value.text); |
|
| 401 | - | // Last resort: try to convert to string |
|
| 402 | - | return ""; |
|
| 451 | + | if (value.__cdata) strValue = String(value.__cdata); |
|
| 452 | + | else if (value["#text"]) strValue = String(value["#text"]); |
|
| 453 | + | else if (value.text) strValue = String(value.text); |
|
| 454 | + | // Last resort: return empty string |
|
| 455 | + | else return ""; |
|
| 456 | + | } else { |
|
| 457 | + | // For numbers, booleans, etc. |
|
| 458 | + | strValue = String(value); |
|
| 403 | 459 | } |
|
| 404 | 460 | ||
| 405 | - | // For numbers, booleans, etc. |
|
| 406 | - | return String(value); |
|
| 461 | + | // Decode HTML entities before returning |
|
| 462 | + | return decodeHtmlEntities(strValue); |
|
| 407 | 463 | } |
|
| 408 | 464 | ||
| 409 | 465 | /** |
|