chore: adapt feeds to support empty title entries
d7ad877b
1 file(s) · +55 −0
| 34 | 34 | pub published_at: i64, |
|
| 35 | 35 | } |
|
| 36 | 36 | ||
| 37 | + | const DERIVED_TITLE_MAX_CHARS: usize = 80; |
|
| 38 | + | ||
| 39 | + | /// Build a synthetic title from an entry's HTML description when the feed |
|
| 40 | + | /// publishes empty `<title>` tags (common for Micro.blog-style microposts). |
|
| 41 | + | /// Strips tags, collapses whitespace, and truncates to a readable preview. |
|
| 42 | + | fn derive_title_from_html(html: &str) -> String { |
|
| 43 | + | let fragment = Html::parse_fragment(html); |
|
| 44 | + | let text: String = fragment.root_element().text().collect(); |
|
| 45 | + | let collapsed = text.split_whitespace().collect::<Vec<_>>().join(" "); |
|
| 46 | + | let mut chars = collapsed.chars(); |
|
| 47 | + | let truncated: String = chars.by_ref().take(DERIVED_TITLE_MAX_CHARS).collect(); |
|
| 48 | + | if chars.next().is_some() { |
|
| 49 | + | format!("{}…", truncated.trim_end()) |
|
| 50 | + | } else { |
|
| 51 | + | truncated |
|
| 52 | + | } |
|
| 53 | + | } |
|
| 54 | + | ||
| 37 | 55 | fn build_client() -> reqwest::Client { |
|
| 38 | 56 | reqwest::Client::builder() |
|
| 39 | 57 | .timeout(Duration::from_secs(15)) |
|
| 116 | 134 | .title |
|
| 117 | 135 | .as_ref() |
|
| 118 | 136 | .map(|t| t.content.clone()) |
|
| 137 | + | .filter(|t| !t.trim().is_empty()) |
|
| 138 | + | .or_else(|| { |
|
| 139 | + | let html = entry |
|
| 140 | + | .summary |
|
| 141 | + | .as_ref() |
|
| 142 | + | .map(|s| s.content.as_str()) |
|
| 143 | + | .or_else(|| entry.content.as_ref().and_then(|c| c.body.as_deref()))?; |
|
| 144 | + | let derived = derive_title_from_html(html); |
|
| 145 | + | if derived.is_empty() { |
|
| 146 | + | None |
|
| 147 | + | } else { |
|
| 148 | + | Some(derived) |
|
| 149 | + | } |
|
| 150 | + | }) |
|
| 119 | 151 | .unwrap_or_default(); |
|
| 120 | 152 | let author = entry.authors.first().map(|a| a.name.clone()); |
|
| 121 | 153 | let guid = if !entry.id.is_empty() { |
|
| 349 | 381 | #[cfg(test)] |
|
| 350 | 382 | mod tests { |
|
| 351 | 383 | use super::*; |
|
| 384 | + | ||
| 385 | + | #[test] |
|
| 386 | + | fn derive_title_strips_html_and_collapses_whitespace() { |
|
| 387 | + | let html = "<p>If they launched full-time\n\ngoblin mode, I’d use it</p>"; |
|
| 388 | + | assert_eq!( |
|
| 389 | + | derive_title_from_html(html), |
|
| 390 | + | "If they launched full-time goblin mode, I\u{2019}d use it" |
|
| 391 | + | ); |
|
| 392 | + | } |
|
| 393 | + | ||
| 394 | + | #[test] |
|
| 395 | + | fn derive_title_truncates_long_text() { |
|
| 396 | + | let html = format!("<p>{}</p>", "a ".repeat(100)); |
|
| 397 | + | let out = derive_title_from_html(&html); |
|
| 398 | + | assert!(out.ends_with('…')); |
|
| 399 | + | assert!(out.chars().count() <= DERIVED_TITLE_MAX_CHARS + 1); |
|
| 400 | + | } |
|
| 401 | + | ||
| 402 | + | #[test] |
|
| 403 | + | fn derive_title_empty_html_yields_empty() { |
|
| 404 | + | assert_eq!(derive_title_from_html(""), ""); |
|
| 405 | + | assert_eq!(derive_title_from_html("<p> </p>"), ""); |
|
| 406 | + | } |
|
| 352 | 407 | ||
| 353 | 408 | #[test] |
|
| 354 | 409 | fn parse_opml_flat_outlines() { |
|