chore: adapt feeds to support empty title entries d7ad877b
Steve Simkins · 2026-04-30 11:33 1 file(s) · +55 −0
apps/feeds/src/feeds.rs +55 −0
34 34
    pub published_at: i64,
35 35
}
36 36
37 +
const DERIVED_TITLE_MAX_CHARS: usize = 80;
38 +
39 +
/// Build a synthetic title from an entry's HTML description when the feed
40 +
/// publishes empty `<title>` tags (common for Micro.blog-style microposts).
41 +
/// Strips tags, collapses whitespace, and truncates to a readable preview.
42 +
fn derive_title_from_html(html: &str) -> String {
43 +
    let fragment = Html::parse_fragment(html);
44 +
    let text: String = fragment.root_element().text().collect();
45 +
    let collapsed = text.split_whitespace().collect::<Vec<_>>().join(" ");
46 +
    let mut chars = collapsed.chars();
47 +
    let truncated: String = chars.by_ref().take(DERIVED_TITLE_MAX_CHARS).collect();
48 +
    if chars.next().is_some() {
49 +
        format!("{}…", truncated.trim_end())
50 +
    } else {
51 +
        truncated
52 +
    }
53 +
}
54 +
37 55
fn build_client() -> reqwest::Client {
38 56
    reqwest::Client::builder()
39 57
        .timeout(Duration::from_secs(15))
116 134
                .title
117 135
                .as_ref()
118 136
                .map(|t| t.content.clone())
137 +
                .filter(|t| !t.trim().is_empty())
138 +
                .or_else(|| {
139 +
                    let html = entry
140 +
                        .summary
141 +
                        .as_ref()
142 +
                        .map(|s| s.content.as_str())
143 +
                        .or_else(|| entry.content.as_ref().and_then(|c| c.body.as_deref()))?;
144 +
                    let derived = derive_title_from_html(html);
145 +
                    if derived.is_empty() {
146 +
                        None
147 +
                    } else {
148 +
                        Some(derived)
149 +
                    }
150 +
                })
119 151
                .unwrap_or_default();
120 152
            let author = entry.authors.first().map(|a| a.name.clone());
121 153
            let guid = if !entry.id.is_empty() {
349 381
#[cfg(test)]
350 382
mod tests {
351 383
    use super::*;
384 +
385 +
    #[test]
386 +
    fn derive_title_strips_html_and_collapses_whitespace() {
387 +
        let html = "<p>If they launched   full-time\n\ngoblin mode, I&rsquo;d use it</p>";
388 +
        assert_eq!(
389 +
            derive_title_from_html(html),
390 +
            "If they launched full-time goblin mode, I\u{2019}d use it"
391 +
        );
392 +
    }
393 +
394 +
    #[test]
395 +
    fn derive_title_truncates_long_text() {
396 +
        let html = format!("<p>{}</p>", "a ".repeat(100));
397 +
        let out = derive_title_from_html(&html);
398 +
        assert!(out.ends_with('…'));
399 +
        assert!(out.chars().count() <= DERIVED_TITLE_MAX_CHARS + 1);
400 +
    }
401 +
402 +
    #[test]
403 +
    fn derive_title_empty_html_yields_empty() {
404 +
        assert_eq!(derive_title_from_html(""), "");
405 +
        assert_eq!(derive_title_from_html("<p>   </p>"), "");
406 +
    }
352 407
353 408
    #[test]
354 409
    fn parse_opml_flat_outlines() {