fix: fixed opml importing and improved feed parsing f5f2a012
Steve · 2025-11-05 12:59 3 file(s) · +250 −48
src/App.tsx +60 −24
14 14
	extractPostAuthor,
15 15
	extractPostContent,
16 16
	extractPostDate,
17 +
	sanitizeFeedData,
18 +
	sanitizePostData,
17 19
} from "@/lib/feed-operations";
18 20
import { parseOPML } from "@/lib/opml";
19 21
import {
91 93
			});
92 94
93 95
			let successCount = 0;
94 -
			let failCount = 0;
96 +
			const failedFeeds: Array<{ title: string; url: string; error: string }> =
97 +
				[];
95 98
96 99
			for (let i = 0; i < opmlFeeds.length; i++) {
97 100
				const feed = opmlFeeds[i];
104 107
					const xmlData = await fetchFeedWithFallback(feed.feedUrl);
105 108
					const { feedData, posts, isAtom } = parseFeedXml(xmlData);
106 109
110 +
					// Sanitize feed data to meet schema constraints
111 +
					const sanitizedFeed = sanitizeFeedData(feedData, feed);
112 +
107 113
					const result = evolu.insert("rssFeed", {
108 114
						feedUrl: feed.feedUrl,
109 -
						title: feed.title,
110 -
						description:
111 -
							feed.description ||
112 -
							feedData.description ||
113 -
							feedData.subtitle ||
114 -
							"",
115 +
						title: sanitizedFeed.title,
116 +
						description: sanitizedFeed.description || null,
115 117
						category: feed.category || "Uncategorized",
116 118
						dateUpdated: new Date().toISOString(),
117 119
					});
118 120
119 121
					if (!result.ok) {
120 -
						continue;
122 +
						throw new Error("Failed to insert feed into database");
121 123
					}
122 124
123 125
					for (const post of posts) {
126 +
						// Sanitize post data to meet schema constraints
127 +
						const sanitizedPost = sanitizePostData(
128 +
							post,
129 +
							isAtom,
130 +
							feedData.title,
131 +
						);
132 +
124 133
						evolu.insert("rssPost", {
125 -
							title: post.title,
126 -
							author: extractPostAuthor(post, isAtom, feedData.title),
134 +
							title: sanitizedPost.title,
135 +
							author: sanitizedPost.author || null,
127 136
							publishedDate: extractPostDate(post),
128 -
							link: extractPostLink(post, isAtom),
137 +
							link: sanitizedPost.link,
129 138
							feedId: result.value.id,
130 139
							content: extractPostContent(post),
131 140
						});
133 142
134 143
					successCount++;
135 144
				} catch (error) {
136 -
					console.error(`Failed to import feed: ${feed.title}`, error);
137 -
					failCount++;
145 +
					const errorMessage =
146 +
						error instanceof Error ? error.message : "Unknown error";
147 +
					failedFeeds.push({
148 +
						title: feed.title,
149 +
						url: feed.feedUrl,
150 +
						error: errorMessage,
151 +
					});
138 152
				}
139 153
			}
140 154
141 -
			toast.success(
142 -
				`Import complete! Success: ${successCount}, Failed: ${failCount}`,
143 -
				{ id: importToast },
144 -
			);
155 +
			// Show summary toast
156 +
			if (failedFeeds.length === 0) {
157 +
				toast.success(`Successfully imported all ${successCount} feeds!`, {
158 +
					id: importToast,
159 +
				});
160 +
			} else {
161 +
				toast.warning(
162 +
					`Import complete! Success: ${successCount}, Failed: ${failedFeeds.length}`,
163 +
					{
164 +
						id: importToast,
165 +
						duration: 5000,
166 +
					},
167 +
				);
168 +
169 +
				// Show a follow-up toast with details
170 +
				toast.error(
171 +
					`${failedFeeds.length} feed${failedFeeds.length > 1 ? "s" : ""} failed to import.`,
172 +
					{
173 +
						duration: 8000,
174 +
					},
175 +
				);
176 +
			}
145 177
		} catch (error) {
146 -
			console.error("Failed to import OPML:", error);
147 178
			toast.error("Failed to import OPML. Please check the file format.", {
148 179
				id: importToast,
149 180
			});
198 229
199 230
			const { feedData, posts, isAtom } = parseFeedXml(xmlData);
200 231
232 +
			// Sanitize feed data to meet schema constraints
233 +
			const sanitizedFeed = sanitizeFeedData(feedData);
234 +
201 235
			const result = evolu.insert("rssFeed", {
202 236
				feedUrl: feedUrl,
203 -
				title: feedData.title,
204 -
				description: feedData.description || feedData.subtitle || "",
237 +
				title: sanitizedFeed.title,
238 +
				description: sanitizedFeed.description || null,
205 239
				category: "Uncategorized",
206 240
				dateUpdated: new Date().toISOString(),
207 241
			});
211 245
			}
212 246
213 247
			for (const post of posts) {
248 +
				// Sanitize post data to meet schema constraints
249 +
				const sanitizedPost = sanitizePostData(post, isAtom, feedData.title);
250 +
214 251
				evolu.insert("rssPost", {
215 -
					title: post.title,
216 -
					author: extractPostAuthor(post, isAtom, feedData.title),
252 +
					title: sanitizedPost.title,
253 +
					author: sanitizedPost.author || null,
217 254
					publishedDate: extractPostDate(post),
218 -
					link: extractPostLink(post, isAtom),
255 +
					link: sanitizedPost.link,
219 256
					feedId: result.value.id,
220 257
					content: extractPostContent(post),
221 258
				});
228 265
			setUrlInput("");
229 266
			setErrorMessage("");
230 267
		} catch (error) {
231 -
			console.error("Error adding feed:", error);
232 268
			setErrorMessage(
233 269
				error instanceof Error
234 270
					? error.message
src/components/add-feed-dialog.tsx +13 −6
22 22
	extractPostAuthor,
23 23
	extractPostContent,
24 24
	extractPostDate,
25 +
	sanitizeFeedData,
26 +
	sanitizePostData,
25 27
} from "@/lib/feed-operations";
26 28
27 29
interface AddFeedDialogProps {
71 73
72 74
			const { feedData, posts, isAtom } = parseFeedXml(xmlData);
73 75
76 +
			// Sanitize feed data to meet schema constraints
77 +
			const sanitizedFeed = sanitizeFeedData(feedData);
78 +
74 79
			const result = evolu.insert("rssFeed", {
75 80
				feedUrl: feedUrl,
76 -
				title: feedData.title,
77 -
				description: feedData.description || feedData.subtitle || "",
81 +
				title: sanitizedFeed.title,
82 +
				description: sanitizedFeed.description || null,
78 83
				category: categoryInput || "Uncategorized",
79 84
				dateUpdated: new Date().toISOString(),
80 85
			});
85 90
86 91
			// Process posts/entries
87 92
			for (const post of posts) {
93 +
				// Sanitize post data to meet schema constraints
94 +
				const sanitizedPost = sanitizePostData(post, isAtom, feedData.title);
95 +
88 96
				evolu.insert("rssPost", {
89 -
					title: post.title,
90 -
					author: extractPostAuthor(post, isAtom, feedData.title),
97 +
					title: sanitizedPost.title,
98 +
					author: sanitizedPost.author || null,
91 99
					publishedDate: extractPostDate(post),
92 -
					link: extractPostLink(post, isAtom),
100 +
					link: sanitizedPost.link,
93 101
					feedId: result.value.id,
94 102
					content: extractPostContent(post),
95 103
				});
104 112
			setStatusMessage("");
105 113
			onOpenChange(false);
106 114
		} catch (error) {
107 -
			console.error("Error adding feed:", error);
108 115
			setStatusMessage(
109 116
				error instanceof Error
110 117
					? error.message
src/lib/feed-operations.ts +177 −18
1 1
import { XMLParser } from "fast-xml-parser";
2 2
import { COMMON_FEED_PATHS } from "./feed-discovery";
3 3
4 -
const parser = new XMLParser();
4 +
const parser = new XMLParser({
5 +
	ignoreAttributes: false,
6 +
	attributeNamePrefix: "@_",
7 +
	textNodeName: "#text",
8 +
	cdataPropName: "__cdata",
9 +
	parseAttributeValue: true,
10 +
	trimValues: true,
11 +
});
5 12
6 13
export interface ParsedFeedData {
7 14
	feedData: any;
30 37
 * Parses XML data and determines if it's RSS or Atom feed
31 38
 */
32 39
export function parseFeedXml(xmlData: string): ParsedFeedData {
33 -
	const parsedXmlData = parser.parse(xmlData);
40 +
	let parsedXmlData: any;
41 +
42 +
	try {
43 +
		parsedXmlData = parser.parse(xmlData);
44 +
	} catch (error) {
45 +
		throw new Error(
46 +
			`XML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`,
47 +
		);
48 +
	}
34 49
35 50
	// Determine if it's RSS or Atom feed
36 51
	let feedData: any;
40 55
	if (parsedXmlData.rss) {
41 56
		// RSS feed
42 57
		feedData = parsedXmlData.rss.channel;
43 -
		posts = feedData.item || [];
58 +
		if (!feedData) {
59 +
			throw new Error("RSS feed missing channel element");
60 +
		}
61 +
		const items = feedData.item || [];
62 +
		// Ensure posts is always an array (single item might not be in array)
63 +
		posts = Array.isArray(items) ? items : items ? [items] : [];
44 64
	} else if (parsedXmlData.feed) {
45 65
		// Atom feed
46 66
		feedData = parsedXmlData.feed;
47 -
		posts = feedData.entry || [];
67 +
		const entries = feedData.entry || [];
68 +
		// Ensure posts is always an array (single entry might not be in array)
69 +
		posts = Array.isArray(entries) ? entries : entries ? [entries] : [];
48 70
		isAtom = true;
71 +
	} else if (parsedXmlData["rdf:RDF"]) {
72 +
		// RDF/RSS 1.0 feed
73 +
		feedData = parsedXmlData["rdf:RDF"].channel;
74 +
		const items = parsedXmlData["rdf:RDF"].item || [];
75 +
		posts = Array.isArray(items) ? items : items ? [items] : [];
76 +
		isAtom = false;
49 77
	} else {
50 -
		throw new Error("Unsupported feed format");
78 +
		// Log available root elements for debugging
79 +
		const rootKeys = Object.keys(parsedXmlData);
80 +
		throw new Error(
81 +
			`Unsupported feed format. Found root elements: ${rootKeys.join(", ")}`,
82 +
		);
51 83
	}
52 84
85 +
	// Filter out empty objects from posts array
86 +
	posts = posts.filter((post) => post && Object.keys(post).length > 0);
87 +
53 88
	return { feedData, posts, isAtom };
54 89
}
55 90
63 98
	const urlObj = new URL(websiteUrl);
64 99
	const origin = urlObj.origin;
65 100
66 -
	console.log("Trying to discover feed from:", origin);
67 -
68 101
	for (const path of COMMON_FEED_PATHS) {
69 102
		const testUrl = `${origin}${path}`;
70 -
		console.log("Testing:", testUrl);
71 103
72 104
		try {
73 105
			// Use CORS proxy to avoid CORS issues
83 115
					text.includes("<rss") ||
84 116
					text.includes("<feed")
85 117
				) {
86 -
					console.log("Found feed at:", testUrl);
87 118
					return { feedUrl: testUrl, xmlData: text };
88 119
				}
89 120
			}
90 121
		} catch (error) {
91 -
			console.log("Failed to fetch:", testUrl, error);
92 122
			continue;
93 123
		}
94 124
	}
113 143
 */
114 144
export function extractPostLink(post: any, isAtom: boolean): string {
115 145
	if (isAtom) {
116 -
		return typeof post.link === "string"
117 -
			? post.link || post.id
118 -
			: post.link?.[0] || post.id;
146 +
		// Handle Atom link which can be string, object, or array
147 +
		if (typeof post.link === "string") {
148 +
			return post.link || post.id || "#";
149 +
		} else if (Array.isArray(post.link)) {
150 +
			// Find 'alternate' link or use first link
151 +
			const alternateLink = post.link.find(
152 +
				(l: any) => l["@_rel"] === "alternate" || !l["@_rel"],
153 +
			);
154 +
			return (
155 +
				alternateLink?.["@_href"] || post.link[0]?.["@_href"] || post.id || "#"
156 +
			);
157 +
		} else if (post.link && typeof post.link === "object") {
158 +
			return post.link["@_href"] || post.id || "#";
159 +
		}
160 +
		return post.id || "#";
161 +
	}
162 +
163 +
	// RSS feed
164 +
	const link = post.link || post.guid || post.id;
165 +
	if (!link) return "#";
166 +
167 +
	// Handle link as object (sometimes RSS parsers do this)
168 +
	if (typeof link === "object") {
169 +
		return link["#text"] || link.__cdata || "#";
119 170
	}
120 -
	return post.link || post.id;
171 +
172 +
	return String(link);
121 173
}
122 174
123 175
/**
129 181
	feedTitle: string,
130 182
): string {
131 183
	if (isAtom) {
132 -
		return post.author?.name || feedTitle;
184 +
		// Atom can have author as object with name property
185 +
		const author = post.author;
186 +
		if (typeof author === "object" && author !== null) {
187 +
			return author.name || author["#text"] || feedTitle;
188 +
		}
189 +
		return author || feedTitle;
133 190
	}
134 -
	return post.author || feedTitle;
191 +
192 +
	// RSS feed
193 +
	const author = post.author || post["dc:creator"] || post.creator;
194 +
	if (!author) return feedTitle;
195 +
196 +
	// Handle author as object
197 +
	if (typeof author === "object") {
198 +
		return author["#text"] || author.__cdata || feedTitle;
199 +
	}
200 +
201 +
	return String(author);
135 202
}
136 203
137 204
/**
138 205
 * Extracts content from RSS or Atom post entry
139 206
 */
140 207
export function extractPostContent(post: any): string {
141 -
	return post["content:encoded"] || post.content || "Please open on the web";
208 +
	// Try various content fields in order of preference
209 +
	const content =
210 +
		post["content:encoded"] || post.content || post.description || post.summary;
211 +
212 +
	// Handle different content structures
213 +
	if (typeof content === "string") {
214 +
		const trimmed = content.trim();
215 +
		// If content is too short or empty, return default message
216 +
		return trimmed.length > 0 ? trimmed : "Please open on the web";
217 +
	} else if (content && typeof content === "object") {
218 +
		// Handle CDATA or nested text
219 +
		const extracted = content.__cdata || content["#text"] || "";
220 +
		const trimmed = String(extracted).trim();
221 +
		return trimmed.length > 0 ? trimmed : "Please open on the web";
222 +
	}
223 +
224 +
	// No content found - this is fine for link-only feeds
225 +
	return "Please open on the web";
142 226
}
143 227
144 228
/**
145 229
 * Extracts published date from RSS or Atom post entry
146 230
 */
147 231
export function extractPostDate(post: any): string {
148 -
	return new Date(post.pubDate || post.updated).toISOString();
232 +
	try {
233 +
		const dateValue = post.pubDate || post.updated || post.published;
234 +
		if (!dateValue) {
235 +
			return new Date().toISOString(); // Use current date if no date found
236 +
		}
237 +
		const parsedDate = new Date(dateValue);
238 +
		// Check if date is valid
239 +
		if (isNaN(parsedDate.getTime())) {
240 +
			return new Date().toISOString();
241 +
		}
242 +
		return parsedDate.toISOString();
243 +
	} catch {
244 +
		return new Date().toISOString();
245 +
	}
246 +
}
247 +
248 +
/**
249 +
 * Extract string value from various data types
250 +
 */
251 +
function extractStringValue(value: any): string {
252 +
	if (!value) return "";
253 +
	if (typeof value === "string") return value;
254 +
255 +
	// Handle objects that might contain text
256 +
	if (typeof value === "object") {
257 +
		// Try common text properties
258 +
		if (value.__cdata) return String(value.__cdata);
259 +
		if (value["#text"]) return String(value["#text"]);
260 +
		if (value.text) return String(value.text);
261 +
		// Last resort: try to convert to string
262 +
		return "";
263 +
	}
264 +
265 +
	// For numbers, booleans, etc.
266 +
	return String(value);
267 +
}
268 +
269 +
/**
270 +
 * Safely truncate a string to a maximum length
271 +
 */
272 +
export function truncateString(str: any, maxLength: number): string {
273 +
	const strValue = extractStringValue(str);
274 +
	if (!strValue) return "";
275 +
	const trimmed = strValue.trim();
276 +
	if (trimmed.length <= maxLength) return trimmed;
277 +
	return trimmed.substring(0, maxLength - 3) + "...";
278 +
}
279 +
280 +
/**
281 +
 * Validate and sanitize feed data for insertion
282 +
 */
283 +
export function sanitizeFeedData(feedData: any, feed?: any) {
284 +
	// Extract title from feedData or feed, handling various formats
285 +
	const titleValue = feedData?.title || feed?.title || "Untitled Feed";
286 +
	const descValue =
287 +
		feedData?.description || feedData?.subtitle || feed?.description || "";
288 +
289 +
	return {
290 +
		title: truncateString(titleValue, 200),
291 +
		description: truncateString(descValue, 1000),
292 +
	};
293 +
}
294 +
295 +
/**
296 +
 * Validate and sanitize post data for insertion
297 +
 */
298 +
export function sanitizePostData(
299 +
	post: any,
300 +
	isAtom: boolean,
301 +
	feedTitle: string,
302 +
) {
303 +
	return {
304 +
		title: truncateString(post.title || "Untitled", 1000),
305 +
		author: truncateString(extractPostAuthor(post, isAtom, feedTitle), 200),
306 +
		link: truncateString(extractPostLink(post, isAtom), 1000),
307 +
	};
149 308
}