feat: adds discover search for any url
d8a1fad1
6 file(s) · +237 −0
| 1373 | 1373 | "rand 0.8.5", |
|
| 1374 | 1374 | "reqwest 0.12.28", |
|
| 1375 | 1375 | "rust-embed", |
|
| 1376 | + | "scraper", |
|
| 1376 | 1377 | "serde", |
|
| 1377 | 1378 | "serde_json", |
|
| 1378 | 1379 | "subtle", |
|
| 1379 | 1380 | "tokio", |
|
| 1381 | + | "url", |
|
| 1380 | 1382 | "urlencoding", |
|
| 1381 | 1383 | ] |
|
| 1382 | 1384 |
| 22 | 22 | feed-rs = "2" |
|
| 23 | 23 | chrono = "0.4" |
|
| 24 | 24 | quick-xml = "0.37" |
|
| 25 | + | scraper = "0.22" |
|
| 26 | + | url = "2" |
|
| 25 | 27 | mime_guess = "2" |
|
| 26 | 28 | urlencoding = "2" |
| 1 | 1 | use crate::models::{FeedItem, FreshRSSResponse, SubscriptionList}; |
|
| 2 | + | use scraper::{Html, Selector}; |
|
| 2 | 3 | use std::time::Duration; |
|
| 4 | + | use url::Url; |
|
| 3 | 5 | ||
| 4 | 6 | #[derive(Clone)] |
|
| 5 | 7 | pub struct FreshRSSConfig { |
|
| 318 | 320 | feed_url: &str, |
|
| 319 | 321 | ) -> Result<String, String> { |
|
| 320 | 322 | FreshRSSClient::new(config).await?.add_subscription(feed_url).await |
|
| 323 | + | } |
|
| 324 | + | ||
| 325 | + | pub async fn discover_feeds(base_url: &str) -> Result<Vec<String>, String> { |
|
| 326 | + | let parsed = Url::parse(base_url).map_err(|e| format!("Invalid URL: {e}"))?; |
|
| 327 | + | let client = build_client(); |
|
| 328 | + | ||
| 329 | + | let mut feeds = Vec::new(); |
|
| 330 | + | ||
| 331 | + | // Strategy A: parse HTML for <link rel="alternate"> tags |
|
| 332 | + | if let Ok(response) = client.get(base_url).send().await { |
|
| 333 | + | if let Ok(body) = response.text().await { |
|
| 334 | + | let document = Html::parse_document(&body); |
|
| 335 | + | let selector = Selector::parse(r#"link[rel="alternate"]"#).unwrap(); |
|
| 336 | + | ||
| 337 | + | for element in document.select(&selector) { |
|
| 338 | + | let type_attr = element.attr("type").unwrap_or_default(); |
|
| 339 | + | if type_attr.contains("rss") |
|
| 340 | + | || type_attr.contains("atom") |
|
| 341 | + | || type_attr.contains("xml") |
|
| 342 | + | { |
|
| 343 | + | if let Some(href) = element.attr("href") { |
|
| 344 | + | let resolved = parsed |
|
| 345 | + | .join(href) |
|
| 346 | + | .map(|u| u.to_string()) |
|
| 347 | + | .unwrap_or_else(|_| href.to_string()); |
|
| 348 | + | if !feeds.contains(&resolved) { |
|
| 349 | + | feeds.push(resolved); |
|
| 350 | + | } |
|
| 351 | + | } |
|
| 352 | + | } |
|
| 353 | + | } |
|
| 354 | + | } |
|
| 355 | + | } |
|
| 356 | + | ||
| 357 | + | // Strategy B: probe common feed paths |
|
| 358 | + | if feeds.is_empty() { |
|
| 359 | + | let common_paths = [ |
|
| 360 | + | "/feed", |
|
| 361 | + | "/feed.xml", |
|
| 362 | + | "/rss", |
|
| 363 | + | "/rss.xml", |
|
| 364 | + | "/atom.xml", |
|
| 365 | + | "/index.xml", |
|
| 366 | + | "/feed/rss", |
|
| 367 | + | "/blog/feed", |
|
| 368 | + | "/blog/rss", |
|
| 369 | + | ]; |
|
| 370 | + | ||
| 371 | + | let mut handles = Vec::new(); |
|
| 372 | + | for path in common_paths { |
|
| 373 | + | let probe_url = match parsed.join(path) { |
|
| 374 | + | Ok(u) => u.to_string(), |
|
| 375 | + | Err(_) => continue, |
|
| 376 | + | }; |
|
| 377 | + | let client = client.clone(); |
|
| 378 | + | handles.push(tokio::spawn(async move { |
|
| 379 | + | if let Ok(resp) = client.head(&probe_url).send().await { |
|
| 380 | + | if resp.status().is_success() { |
|
| 381 | + | if let Some(ct) = resp.headers().get("content-type") { |
|
| 382 | + | let ct = ct.to_str().unwrap_or_default(); |
|
| 383 | + | if ct.contains("xml") || ct.contains("rss") || ct.contains("atom") { |
|
| 384 | + | return Some(probe_url); |
|
| 385 | + | } |
|
| 386 | + | } |
|
| 387 | + | } |
|
| 388 | + | } |
|
| 389 | + | None |
|
| 390 | + | })); |
|
| 391 | + | } |
|
| 392 | + | ||
| 393 | + | for handle in handles { |
|
| 394 | + | if let Ok(Some(url)) = handle.await { |
|
| 395 | + | if !feeds.contains(&url) { |
|
| 396 | + | feeds.push(url); |
|
| 397 | + | } |
|
| 398 | + | } |
|
| 399 | + | } |
|
| 400 | + | } |
|
| 401 | + | ||
| 402 | + | if feeds.is_empty() { |
|
| 403 | + | Err("No feeds found at this URL".to_string()) |
|
| 404 | + | } else { |
|
| 405 | + | Ok(feeds) |
|
| 406 | + | } |
|
| 321 | 407 | } |
|
| 322 | 408 | ||
| 323 | 409 | #[cfg(test)] |
|
| 278 | 278 | feed_url: String, |
|
| 279 | 279 | } |
|
| 280 | 280 | ||
| 281 | + | #[derive(Deserialize)] |
|
| 282 | + | struct DiscoverFeedsForm { |
|
| 283 | + | base_url: String, |
|
| 284 | + | } |
|
| 285 | + | ||
| 281 | 286 | async fn login_get_handler(Query(q): Query<FlashQuery>) -> impl IntoResponse { |
|
| 282 | 287 | Html(LoginTemplate { error: q.error }.render().unwrap()) |
|
| 283 | 288 | } |
|
| 351 | 356 | .into_response() |
|
| 352 | 357 | } |
|
| 353 | 358 | ||
| 359 | + | async fn discover_feeds_handler( |
|
| 360 | + | _session: auth::AuthSession, |
|
| 361 | + | Form(form): Form<DiscoverFeedsForm>, |
|
| 362 | + | ) -> Response { |
|
| 363 | + | match feeds::discover_feeds(&form.base_url).await { |
|
| 364 | + | Ok(urls) => Json(serde_json::json!(urls)).into_response(), |
|
| 365 | + | Err(e) => ( |
|
| 366 | + | axum::http::StatusCode::BAD_REQUEST, |
|
| 367 | + | Json(serde_json::json!({ "error": e })), |
|
| 368 | + | ) |
|
| 369 | + | .into_response(), |
|
| 370 | + | } |
|
| 371 | + | } |
|
| 372 | + | ||
| 354 | 373 | async fn add_feed_handler( |
|
| 355 | 374 | _session: auth::AuthSession, |
|
| 356 | 375 | State(state): State<Arc<AppState>>, |
|
| 401 | 420 | ) |
|
| 402 | 421 | .route("/admin/logout", get(logout_handler)) |
|
| 403 | 422 | .route("/admin/add-feed", post(add_feed_handler)) |
|
| 423 | + | .route("/admin/discover-feeds", post(discover_feeds_handler)) |
|
| 404 | 424 | .route("/static/{*path}", get(static_handler)) |
|
| 405 | 425 | .with_state(state); |
|
| 406 | 426 | ||
| 29 | 29 | <p class="error-msg">{{ err }}</p> |
|
| 30 | 30 | {% endif %} |
|
| 31 | 31 | ||
| 32 | + | <div class="admin-form"> |
|
| 33 | + | <label for="base_url">Discover Feed</label> |
|
| 34 | + | <div class="discover-row"> |
|
| 35 | + | <input type="url" id="base_url" placeholder="https://example.com" /> |
|
| 36 | + | <button type="button" id="discover-btn" onclick="discoverFeeds()">Discover</button> |
|
| 37 | + | </div> |
|
| 38 | + | <div id="discover-status" class="discover-status" style="display:none;"></div> |
|
| 39 | + | <div id="discover-results" class="discover-results" style="display:none;"></div> |
|
| 40 | + | </div> |
|
| 41 | + | ||
| 32 | 42 | <form class="admin-form" method="POST" action="/admin/add-feed"> |
|
| 33 | 43 | <label for="feed_url">Feed URL</label> |
|
| 34 | 44 | <input type="url" id="feed_url" name="feed_url" placeholder="https://example.com/feed.xml" required /> |
|
| 35 | 45 | <button type="submit">Add Feed</button> |
|
| 36 | 46 | </form> |
|
| 47 | + | ||
| 48 | + | <script> |
|
| 49 | + | async function discoverFeeds() { |
|
| 50 | + | const baseUrl = document.getElementById('base_url').value.trim(); |
|
| 51 | + | if (!baseUrl) return; |
|
| 52 | + | ||
| 53 | + | const btn = document.getElementById('discover-btn'); |
|
| 54 | + | const status = document.getElementById('discover-status'); |
|
| 55 | + | const results = document.getElementById('discover-results'); |
|
| 56 | + | const feedInput = document.getElementById('feed_url'); |
|
| 57 | + | ||
| 58 | + | btn.disabled = true; |
|
| 59 | + | btn.textContent = 'Searching...'; |
|
| 60 | + | status.style.display = 'none'; |
|
| 61 | + | results.style.display = 'none'; |
|
| 62 | + | results.innerHTML = ''; |
|
| 63 | + | ||
| 64 | + | try { |
|
| 65 | + | const body = new URLSearchParams({ base_url: baseUrl }); |
|
| 66 | + | const resp = await fetch('/admin/discover-feeds', { method: 'POST', body }); |
|
| 67 | + | const data = await resp.json(); |
|
| 68 | + | ||
| 69 | + | if (!resp.ok) { |
|
| 70 | + | status.textContent = data.error || 'No feeds found'; |
|
| 71 | + | status.className = 'discover-status error-msg'; |
|
| 72 | + | status.style.display = 'block'; |
|
| 73 | + | return; |
|
| 74 | + | } |
|
| 75 | + | ||
| 76 | + | feedInput.value = data[0]; |
|
| 77 | + | status.textContent = data.length + ' feed(s) found'; |
|
| 78 | + | status.className = 'discover-status success-msg'; |
|
| 79 | + | status.style.display = 'block'; |
|
| 80 | + | ||
| 81 | + | if (data.length > 1) { |
|
| 82 | + | results.style.display = 'flex'; |
|
| 83 | + | data.forEach(function(url) { |
|
| 84 | + | const item = document.createElement('button'); |
|
| 85 | + | item.type = 'button'; |
|
| 86 | + | item.className = 'discover-result-item' + (url === data[0] ? ' active' : ''); |
|
| 87 | + | item.textContent = url; |
|
| 88 | + | item.onclick = function() { |
|
| 89 | + | feedInput.value = url; |
|
| 90 | + | results.querySelectorAll('.discover-result-item').forEach(function(el) { |
|
| 91 | + | el.classList.remove('active'); |
|
| 92 | + | }); |
|
| 93 | + | item.classList.add('active'); |
|
| 94 | + | }; |
|
| 95 | + | results.appendChild(item); |
|
| 96 | + | }); |
|
| 97 | + | } |
|
| 98 | + | } catch (e) { |
|
| 99 | + | status.textContent = 'Request failed'; |
|
| 100 | + | status.className = 'discover-status error-msg'; |
|
| 101 | + | status.style.display = 'block'; |
|
| 102 | + | } finally { |
|
| 103 | + | btn.disabled = false; |
|
| 104 | + | btn.textContent = 'Discover'; |
|
| 105 | + | } |
|
| 106 | + | } |
|
| 107 | + | </script> |
|
| 37 | 108 | ||
| 38 | 109 | {% if let Some(subs) = subscriptions %} |
|
| 39 | 110 | <div class="admin-subs"> |
| 224 | 224 | line-height: 1.4; |
|
| 225 | 225 | } |
|
| 226 | 226 | ||
| 227 | + | .discover-row { |
|
| 228 | + | display: flex; |
|
| 229 | + | gap: 0.5rem; |
|
| 230 | + | width: 100%; |
|
| 231 | + | } |
|
| 232 | + | ||
| 233 | + | .discover-row input { |
|
| 234 | + | flex: 1; |
|
| 235 | + | background: #1a1a1c; |
|
| 236 | + | color: #ffffff; |
|
| 237 | + | border: 1px solid #333; |
|
| 238 | + | padding: 10px; |
|
| 239 | + | font-family: "Commit Mono", monospace, sans-serif; |
|
| 240 | + | font-size: 14px; |
|
| 241 | + | outline: none; |
|
| 242 | + | } |
|
| 243 | + | ||
| 244 | + | .discover-row input:focus { |
|
| 245 | + | border-color: #666; |
|
| 246 | + | } |
|
| 247 | + | ||
| 248 | + | .discover-status { |
|
| 249 | + | font-size: 12px; |
|
| 250 | + | } |
|
| 251 | + | ||
| 252 | + | .discover-results { |
|
| 253 | + | display: flex; |
|
| 254 | + | flex-direction: column; |
|
| 255 | + | gap: 0.25rem; |
|
| 256 | + | width: 100%; |
|
| 257 | + | } |
|
| 258 | + | ||
| 259 | + | .discover-result-item { |
|
| 260 | + | background: #1a1a1c; |
|
| 261 | + | color: #888; |
|
| 262 | + | border: 1px solid #333; |
|
| 263 | + | padding: 8px 10px; |
|
| 264 | + | font-size: 12px; |
|
| 265 | + | text-align: left; |
|
| 266 | + | cursor: pointer; |
|
| 267 | + | width: 100%; |
|
| 268 | + | white-space: nowrap; |
|
| 269 | + | overflow: hidden; |
|
| 270 | + | text-overflow: ellipsis; |
|
| 271 | + | } |
|
| 272 | + | ||
| 273 | + | .discover-result-item:hover { |
|
| 274 | + | border-color: #666; |
|
| 275 | + | color: #ffffff; |
|
| 276 | + | } |
|
| 277 | + | ||
| 278 | + | .discover-result-item.active { |
|
| 279 | + | border-color: #6bff8a; |
|
| 280 | + | color: #ffffff; |
|
| 281 | + | } |
|
| 282 | + | ||
| 227 | 283 | .admin-subs { |
|
| 228 | 284 | width: 100%; |
|
| 229 | 285 | display: flex; |