fix: fixed opml importing and improved feed parsing
f5f2a012
3 file(s) · +250 −48
| 14 | 14 | extractPostAuthor, |
|
| 15 | 15 | extractPostContent, |
|
| 16 | 16 | extractPostDate, |
|
| 17 | + | sanitizeFeedData, |
|
| 18 | + | sanitizePostData, |
|
| 17 | 19 | } from "@/lib/feed-operations"; |
|
| 18 | 20 | import { parseOPML } from "@/lib/opml"; |
|
| 19 | 21 | import { |
|
| 91 | 93 | }); |
|
| 92 | 94 | ||
| 93 | 95 | let successCount = 0; |
|
| 94 | - | let failCount = 0; |
|
| 96 | + | const failedFeeds: Array<{ title: string; url: string; error: string }> = |
|
| 97 | + | []; |
|
| 95 | 98 | ||
| 96 | 99 | for (let i = 0; i < opmlFeeds.length; i++) { |
|
| 97 | 100 | const feed = opmlFeeds[i]; |
|
| 104 | 107 | const xmlData = await fetchFeedWithFallback(feed.feedUrl); |
|
| 105 | 108 | const { feedData, posts, isAtom } = parseFeedXml(xmlData); |
|
| 106 | 109 | ||
| 110 | + | // Sanitize feed data to meet schema constraints |
|
| 111 | + | const sanitizedFeed = sanitizeFeedData(feedData, feed); |
|
| 112 | + | ||
| 107 | 113 | const result = evolu.insert("rssFeed", { |
|
| 108 | 114 | feedUrl: feed.feedUrl, |
|
| 109 | - | title: feed.title, |
|
| 110 | - | description: |
|
| 111 | - | feed.description || |
|
| 112 | - | feedData.description || |
|
| 113 | - | feedData.subtitle || |
|
| 114 | - | "", |
|
| 115 | + | title: sanitizedFeed.title, |
|
| 116 | + | description: sanitizedFeed.description || null, |
|
| 115 | 117 | category: feed.category || "Uncategorized", |
|
| 116 | 118 | dateUpdated: new Date().toISOString(), |
|
| 117 | 119 | }); |
|
| 118 | 120 | ||
| 119 | 121 | if (!result.ok) { |
|
| 120 | - | continue; |
|
| 122 | + | throw new Error("Failed to insert feed into database"); |
|
| 121 | 123 | } |
|
| 122 | 124 | ||
| 123 | 125 | for (const post of posts) { |
|
| 126 | + | // Sanitize post data to meet schema constraints |
|
| 127 | + | const sanitizedPost = sanitizePostData( |
|
| 128 | + | post, |
|
| 129 | + | isAtom, |
|
| 130 | + | feedData.title, |
|
| 131 | + | ); |
|
| 132 | + | ||
| 124 | 133 | evolu.insert("rssPost", { |
|
| 125 | - | title: post.title, |
|
| 126 | - | author: extractPostAuthor(post, isAtom, feedData.title), |
|
| 134 | + | title: sanitizedPost.title, |
|
| 135 | + | author: sanitizedPost.author || null, |
|
| 127 | 136 | publishedDate: extractPostDate(post), |
|
| 128 | - | link: extractPostLink(post, isAtom), |
|
| 137 | + | link: sanitizedPost.link, |
|
| 129 | 138 | feedId: result.value.id, |
|
| 130 | 139 | content: extractPostContent(post), |
|
| 131 | 140 | }); |
|
| 133 | 142 | ||
| 134 | 143 | successCount++; |
|
| 135 | 144 | } catch (error) { |
|
| 136 | - | console.error(`Failed to import feed: ${feed.title}`, error); |
|
| 137 | - | failCount++; |
|
| 145 | + | const errorMessage = |
|
| 146 | + | error instanceof Error ? error.message : "Unknown error"; |
|
| 147 | + | failedFeeds.push({ |
|
| 148 | + | title: feed.title, |
|
| 149 | + | url: feed.feedUrl, |
|
| 150 | + | error: errorMessage, |
|
| 151 | + | }); |
|
| 138 | 152 | } |
|
| 139 | 153 | } |
|
| 140 | 154 | ||
| 141 | - | toast.success( |
|
| 142 | - | `Import complete! Success: ${successCount}, Failed: ${failCount}`, |
|
| 143 | - | { id: importToast }, |
|
| 144 | - | ); |
|
| 155 | + | // Show summary toast |
|
| 156 | + | if (failedFeeds.length === 0) { |
|
| 157 | + | toast.success(`Successfully imported all ${successCount} feeds!`, { |
|
| 158 | + | id: importToast, |
|
| 159 | + | }); |
|
| 160 | + | } else { |
|
| 161 | + | toast.warning( |
|
| 162 | + | `Import complete! Success: ${successCount}, Failed: ${failedFeeds.length}`, |
|
| 163 | + | { |
|
| 164 | + | id: importToast, |
|
| 165 | + | duration: 5000, |
|
| 166 | + | }, |
|
| 167 | + | ); |
|
| 168 | + | ||
| 169 | + | // Show a follow-up toast with details |
|
| 170 | + | toast.error( |
|
| 171 | + | `${failedFeeds.length} feed${failedFeeds.length > 1 ? "s" : ""} failed to import.`, |
|
| 172 | + | { |
|
| 173 | + | duration: 8000, |
|
| 174 | + | }, |
|
| 175 | + | ); |
|
| 176 | + | } |
|
| 145 | 177 | } catch (error) { |
|
| 146 | - | console.error("Failed to import OPML:", error); |
|
| 147 | 178 | toast.error("Failed to import OPML. Please check the file format.", { |
|
| 148 | 179 | id: importToast, |
|
| 149 | 180 | }); |
|
| 198 | 229 | ||
| 199 | 230 | const { feedData, posts, isAtom } = parseFeedXml(xmlData); |
|
| 200 | 231 | ||
| 232 | + | // Sanitize feed data to meet schema constraints |
|
| 233 | + | const sanitizedFeed = sanitizeFeedData(feedData); |
|
| 234 | + | ||
| 201 | 235 | const result = evolu.insert("rssFeed", { |
|
| 202 | 236 | feedUrl: feedUrl, |
|
| 203 | - | title: feedData.title, |
|
| 204 | - | description: feedData.description || feedData.subtitle || "", |
|
| 237 | + | title: sanitizedFeed.title, |
|
| 238 | + | description: sanitizedFeed.description || null, |
|
| 205 | 239 | category: "Uncategorized", |
|
| 206 | 240 | dateUpdated: new Date().toISOString(), |
|
| 207 | 241 | }); |
|
| 211 | 245 | } |
|
| 212 | 246 | ||
| 213 | 247 | for (const post of posts) { |
|
| 248 | + | // Sanitize post data to meet schema constraints |
|
| 249 | + | const sanitizedPost = sanitizePostData(post, isAtom, feedData.title); |
|
| 250 | + | ||
| 214 | 251 | evolu.insert("rssPost", { |
|
| 215 | - | title: post.title, |
|
| 216 | - | author: extractPostAuthor(post, isAtom, feedData.title), |
|
| 252 | + | title: sanitizedPost.title, |
|
| 253 | + | author: sanitizedPost.author || null, |
|
| 217 | 254 | publishedDate: extractPostDate(post), |
|
| 218 | - | link: extractPostLink(post, isAtom), |
|
| 255 | + | link: sanitizedPost.link, |
|
| 219 | 256 | feedId: result.value.id, |
|
| 220 | 257 | content: extractPostContent(post), |
|
| 221 | 258 | }); |
|
| 228 | 265 | setUrlInput(""); |
|
| 229 | 266 | setErrorMessage(""); |
|
| 230 | 267 | } catch (error) { |
|
| 231 | - | console.error("Error adding feed:", error); |
|
| 232 | 268 | setErrorMessage( |
|
| 233 | 269 | error instanceof Error |
|
| 234 | 270 | ? error.message |
|
| 22 | 22 | extractPostAuthor, |
|
| 23 | 23 | extractPostContent, |
|
| 24 | 24 | extractPostDate, |
|
| 25 | + | sanitizeFeedData, |
|
| 26 | + | sanitizePostData, |
|
| 25 | 27 | } from "@/lib/feed-operations"; |
|
| 26 | 28 | ||
| 27 | 29 | interface AddFeedDialogProps { |
|
| 71 | 73 | ||
| 72 | 74 | const { feedData, posts, isAtom } = parseFeedXml(xmlData); |
|
| 73 | 75 | ||
| 76 | + | // Sanitize feed data to meet schema constraints |
|
| 77 | + | const sanitizedFeed = sanitizeFeedData(feedData); |
|
| 78 | + | ||
| 74 | 79 | const result = evolu.insert("rssFeed", { |
|
| 75 | 80 | feedUrl: feedUrl, |
|
| 76 | - | title: feedData.title, |
|
| 77 | - | description: feedData.description || feedData.subtitle || "", |
|
| 81 | + | title: sanitizedFeed.title, |
|
| 82 | + | description: sanitizedFeed.description || null, |
|
| 78 | 83 | category: categoryInput || "Uncategorized", |
|
| 79 | 84 | dateUpdated: new Date().toISOString(), |
|
| 80 | 85 | }); |
|
| 85 | 90 | ||
| 86 | 91 | // Process posts/entries |
|
| 87 | 92 | for (const post of posts) { |
|
| 93 | + | // Sanitize post data to meet schema constraints |
|
| 94 | + | const sanitizedPost = sanitizePostData(post, isAtom, feedData.title); |
|
| 95 | + | ||
| 88 | 96 | evolu.insert("rssPost", { |
|
| 89 | - | title: post.title, |
|
| 90 | - | author: extractPostAuthor(post, isAtom, feedData.title), |
|
| 97 | + | title: sanitizedPost.title, |
|
| 98 | + | author: sanitizedPost.author || null, |
|
| 91 | 99 | publishedDate: extractPostDate(post), |
|
| 92 | - | link: extractPostLink(post, isAtom), |
|
| 100 | + | link: sanitizedPost.link, |
|
| 93 | 101 | feedId: result.value.id, |
|
| 94 | 102 | content: extractPostContent(post), |
|
| 95 | 103 | }); |
|
| 104 | 112 | setStatusMessage(""); |
|
| 105 | 113 | onOpenChange(false); |
|
| 106 | 114 | } catch (error) { |
|
| 107 | - | console.error("Error adding feed:", error); |
|
| 108 | 115 | setStatusMessage( |
|
| 109 | 116 | error instanceof Error |
|
| 110 | 117 | ? error.message |
|
| 1 | 1 | import { XMLParser } from "fast-xml-parser"; |
|
| 2 | 2 | import { COMMON_FEED_PATHS } from "./feed-discovery"; |
|
| 3 | 3 | ||
| 4 | - | const parser = new XMLParser(); |
|
| 4 | + | const parser = new XMLParser({ |
|
| 5 | + | ignoreAttributes: false, |
|
| 6 | + | attributeNamePrefix: "@_", |
|
| 7 | + | textNodeName: "#text", |
|
| 8 | + | cdataPropName: "__cdata", |
|
| 9 | + | parseAttributeValue: true, |
|
| 10 | + | trimValues: true, |
|
| 11 | + | }); |
|
| 5 | 12 | ||
| 6 | 13 | export interface ParsedFeedData { |
|
| 7 | 14 | feedData: any; |
|
| 30 | 37 | * Parses XML data and determines if it's RSS or Atom feed |
|
| 31 | 38 | */ |
|
| 32 | 39 | export function parseFeedXml(xmlData: string): ParsedFeedData { |
|
| 33 | - | const parsedXmlData = parser.parse(xmlData); |
|
| 40 | + | let parsedXmlData: any; |
|
| 41 | + | ||
| 42 | + | try { |
|
| 43 | + | parsedXmlData = parser.parse(xmlData); |
|
| 44 | + | } catch (error) { |
|
| 45 | + | throw new Error( |
|
| 46 | + | `XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`, |
|
| 47 | + | ); |
|
| 48 | + | } |
|
| 34 | 49 | ||
| 35 | 50 | // Determine if it's RSS or Atom feed |
|
| 36 | 51 | let feedData: any; |
|
| 40 | 55 | if (parsedXmlData.rss) { |
|
| 41 | 56 | // RSS feed |
|
| 42 | 57 | feedData = parsedXmlData.rss.channel; |
|
| 43 | - | posts = feedData.item || []; |
|
| 58 | + | if (!feedData) { |
|
| 59 | + | throw new Error("RSS feed missing channel element"); |
|
| 60 | + | } |
|
| 61 | + | const items = feedData.item || []; |
|
| 62 | + | // Ensure posts is always an array (single item might not be in array) |
|
| 63 | + | posts = Array.isArray(items) ? items : items ? [items] : []; |
|
| 44 | 64 | } else if (parsedXmlData.feed) { |
|
| 45 | 65 | // Atom feed |
|
| 46 | 66 | feedData = parsedXmlData.feed; |
|
| 47 | - | posts = feedData.entry || []; |
|
| 67 | + | const entries = feedData.entry || []; |
|
| 68 | + | // Ensure posts is always an array (single entry might not be in array) |
|
| 69 | + | posts = Array.isArray(entries) ? entries : entries ? [entries] : []; |
|
| 48 | 70 | isAtom = true; |
|
| 71 | + | } else if (parsedXmlData["rdf:RDF"]) { |
|
| 72 | + | // RDF/RSS 1.0 feed |
|
| 73 | + | feedData = parsedXmlData["rdf:RDF"].channel; |
|
| 74 | + | const items = parsedXmlData["rdf:RDF"].item || []; |
|
| 75 | + | posts = Array.isArray(items) ? items : items ? [items] : []; |
|
| 76 | + | isAtom = false; |
|
| 49 | 77 | } else { |
|
| 50 | - | throw new Error("Unsupported feed format"); |
|
| 78 | + | // Log available root elements for debugging |
|
| 79 | + | const rootKeys = Object.keys(parsedXmlData); |
|
| 80 | + | throw new Error( |
|
| 81 | + | `Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`, |
|
| 82 | + | ); |
|
| 51 | 83 | } |
|
| 52 | 84 | ||
| 85 | + | // Filter out empty objects from posts array |
|
| 86 | + | posts = posts.filter((post) => post && Object.keys(post).length > 0); |
|
| 87 | + | ||
| 53 | 88 | return { feedData, posts, isAtom }; |
|
| 54 | 89 | } |
|
| 55 | 90 | ||
| 63 | 98 | const urlObj = new URL(websiteUrl); |
|
| 64 | 99 | const origin = urlObj.origin; |
|
| 65 | 100 | ||
| 66 | - | console.log("Trying to discover feed from:", origin); |
|
| 67 | - | ||
| 68 | 101 | for (const path of COMMON_FEED_PATHS) { |
|
| 69 | 102 | const testUrl = `${origin}${path}`; |
|
| 70 | - | console.log("Testing:", testUrl); |
|
| 71 | 103 | ||
| 72 | 104 | try { |
|
| 73 | 105 | // Use CORS proxy to avoid CORS issues |
|
| 83 | 115 | text.includes("<rss") || |
|
| 84 | 116 | text.includes("<feed") |
|
| 85 | 117 | ) { |
|
| 86 | - | console.log("Found feed at:", testUrl); |
|
| 87 | 118 | return { feedUrl: testUrl, xmlData: text }; |
|
| 88 | 119 | } |
|
| 89 | 120 | } |
|
| 90 | 121 | } catch (error) { |
|
| 91 | - | console.log("Failed to fetch:", testUrl, error); |
|
| 92 | 122 | continue; |
|
| 93 | 123 | } |
|
| 94 | 124 | } |
|
| 113 | 143 | */ |
|
| 114 | 144 | export function extractPostLink(post: any, isAtom: boolean): string { |
|
| 115 | 145 | if (isAtom) { |
|
| 116 | - | return typeof post.link === "string" |
|
| 117 | - | ? post.link || post.id |
|
| 118 | - | : post.link?.[0] || post.id; |
|
| 146 | + | // Handle Atom link which can be string, object, or array |
|
| 147 | + | if (typeof post.link === "string") { |
|
| 148 | + | return post.link || post.id || "#"; |
|
| 149 | + | } else if (Array.isArray(post.link)) { |
|
| 150 | + | // Find 'alternate' link or use first link |
|
| 151 | + | const alternateLink = post.link.find( |
|
| 152 | + | (l: any) => l["@_rel"] === "alternate" || !l["@_rel"], |
|
| 153 | + | ); |
|
| 154 | + | return ( |
|
| 155 | + | alternateLink?.["@_href"] || post.link[0]?.["@_href"] || post.id || "#" |
|
| 156 | + | ); |
|
| 157 | + | } else if (post.link && typeof post.link === "object") { |
|
| 158 | + | return post.link["@_href"] || post.id || "#"; |
|
| 159 | + | } |
|
| 160 | + | return post.id || "#"; |
|
| 161 | + | } |
|
| 162 | + | ||
| 163 | + | // RSS feed |
|
| 164 | + | const link = post.link || post.guid || post.id; |
|
| 165 | + | if (!link) return "#"; |
|
| 166 | + | ||
| 167 | + | // Handle link as object (sometimes RSS parsers do this) |
|
| 168 | + | if (typeof link === "object") { |
|
| 169 | + | return link["#text"] || link.__cdata || "#"; |
|
| 119 | 170 | } |
|
| 120 | - | return post.link || post.id; |
|
| 171 | + | ||
| 172 | + | return String(link); |
|
| 121 | 173 | } |
|
| 122 | 174 | ||
| 123 | 175 | /** |
|
| 129 | 181 | feedTitle: string, |
|
| 130 | 182 | ): string { |
|
| 131 | 183 | if (isAtom) { |
|
| 132 | - | return post.author?.name || feedTitle; |
|
| 184 | + | // Atom can have author as object with name property |
|
| 185 | + | const author = post.author; |
|
| 186 | + | if (typeof author === "object" && author !== null) { |
|
| 187 | + | return author.name || author["#text"] || feedTitle; |
|
| 188 | + | } |
|
| 189 | + | return author || feedTitle; |
|
| 133 | 190 | } |
|
| 134 | - | return post.author || feedTitle; |
|
| 191 | + | ||
| 192 | + | // RSS feed |
|
| 193 | + | const author = post.author || post["dc:creator"] || post.creator; |
|
| 194 | + | if (!author) return feedTitle; |
|
| 195 | + | ||
| 196 | + | // Handle author as object |
|
| 197 | + | if (typeof author === "object") { |
|
| 198 | + | return author["#text"] || author.__cdata || feedTitle; |
|
| 199 | + | } |
|
| 200 | + | ||
| 201 | + | return String(author); |
|
| 135 | 202 | } |
|
| 136 | 203 | ||
| 137 | 204 | /** |
|
| 138 | 205 | * Extracts content from RSS or Atom post entry |
|
| 139 | 206 | */ |
|
| 140 | 207 | export function extractPostContent(post: any): string { |
|
| 141 | - | return post["content:encoded"] || post.content || "Please open on the web"; |
|
| 208 | + | // Try various content fields in order of preference |
|
| 209 | + | const content = |
|
| 210 | + | post["content:encoded"] || post.content || post.description || post.summary; |
|
| 211 | + | ||
| 212 | + | // Handle different content structures |
|
| 213 | + | if (typeof content === "string") { |
|
| 214 | + | const trimmed = content.trim(); |
|
| 215 | + | // If content is too short or empty, return default message |
|
| 216 | + | return trimmed.length > 0 ? trimmed : "Please open on the web"; |
|
| 217 | + | } else if (content && typeof content === "object") { |
|
| 218 | + | // Handle CDATA or nested text |
|
| 219 | + | const extracted = content.__cdata || content["#text"] || ""; |
|
| 220 | + | const trimmed = String(extracted).trim(); |
|
| 221 | + | return trimmed.length > 0 ? trimmed : "Please open on the web"; |
|
| 222 | + | } |
|
| 223 | + | ||
| 224 | + | // No content found - this is fine for link-only feeds |
|
| 225 | + | return "Please open on the web"; |
|
| 142 | 226 | } |
|
| 143 | 227 | ||
| 144 | 228 | /** |
|
| 145 | 229 | * Extracts published date from RSS or Atom post entry |
|
| 146 | 230 | */ |
|
| 147 | 231 | export function extractPostDate(post: any): string { |
|
| 148 | - | return new Date(post.pubDate || post.updated).toISOString(); |
|
| 232 | + | try { |
|
| 233 | + | const dateValue = post.pubDate || post.updated || post.published; |
|
| 234 | + | if (!dateValue) { |
|
| 235 | + | return new Date().toISOString(); // Use current date if no date found |
|
| 236 | + | } |
|
| 237 | + | const parsedDate = new Date(dateValue); |
|
| 238 | + | // Check if date is valid |
|
| 239 | + | if (isNaN(parsedDate.getTime())) { |
|
| 240 | + | return new Date().toISOString(); |
|
| 241 | + | } |
|
| 242 | + | return parsedDate.toISOString(); |
|
| 243 | + | } catch { |
|
| 244 | + | return new Date().toISOString(); |
|
| 245 | + | } |
|
| 246 | + | } |
|
| 247 | + | ||
| 248 | + | /** |
|
| 249 | + | * Extract string value from various data types |
|
| 250 | + | */ |
|
| 251 | + | function extractStringValue(value: any): string { |
|
| 252 | + | if (!value) return ""; |
|
| 253 | + | if (typeof value === "string") return value; |
|
| 254 | + | ||
| 255 | + | // Handle objects that might contain text |
|
| 256 | + | if (typeof value === "object") { |
|
| 257 | + | // Try common text properties |
|
| 258 | + | if (value.__cdata) return String(value.__cdata); |
|
| 259 | + | if (value["#text"]) return String(value["#text"]); |
|
| 260 | + | if (value.text) return String(value.text); |
|
| 261 | + | // Last resort: try to convert to string |
|
| 262 | + | return ""; |
|
| 263 | + | } |
|
| 264 | + | ||
| 265 | + | // For numbers, booleans, etc. |
|
| 266 | + | return String(value); |
|
| 267 | + | } |
|
| 268 | + | ||
| 269 | + | /** |
|
| 270 | + | * Safely truncate a string to a maximum length |
|
| 271 | + | */ |
|
| 272 | + | export function truncateString(str: any, maxLength: number): string { |
|
| 273 | + | const strValue = extractStringValue(str); |
|
| 274 | + | if (!strValue) return ""; |
|
| 275 | + | const trimmed = strValue.trim(); |
|
| 276 | + | if (trimmed.length <= maxLength) return trimmed; |
|
| 277 | + | return trimmed.substring(0, maxLength - 3) + "..."; |
|
| 278 | + | } |
|
| 279 | + | ||
| 280 | + | /** |
|
| 281 | + | * Validate and sanitize feed data for insertion |
|
| 282 | + | */ |
|
| 283 | + | export function sanitizeFeedData(feedData: any, feed?: any) { |
|
| 284 | + | // Extract title from feedData or feed, handling various formats |
|
| 285 | + | const titleValue = feedData?.title || feed?.title || "Untitled Feed"; |
|
| 286 | + | const descValue = |
|
| 287 | + | feedData?.description || feedData?.subtitle || feed?.description || ""; |
|
| 288 | + | ||
| 289 | + | return { |
|
| 290 | + | title: truncateString(titleValue, 200), |
|
| 291 | + | description: truncateString(descValue, 1000), |
|
| 292 | + | }; |
|
| 293 | + | } |
|
| 294 | + | ||
| 295 | + | /** |
|
| 296 | + | * Validate and sanitize post data for insertion |
|
| 297 | + | */ |
|
| 298 | + | export function sanitizePostData( |
|
| 299 | + | post: any, |
|
| 300 | + | isAtom: boolean, |
|
| 301 | + | feedTitle: string, |
|
| 302 | + | ) { |
|
| 303 | + | return { |
|
| 304 | + | title: truncateString(post.title || "Untitled", 1000), |
|
| 305 | + | author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200), |
|
| 306 | + | link: truncateString(extractPostLink(post, isAtom), 1000), |
|
| 307 | + | }; |
|
| 149 | 308 | } |
|