chore: enable auto discovery and url normalization a4268d5c
Steve Simkins · 2026-05-06 19:08 3 file(s) · +147 −1
Cargo.lock +47 −0
218 218
 "feedparser-rs",
219 219
 "open",
220 220
 "ratatui",
221 +
 "ureq",
221 222
]
222 223
223 224
[[package]]
2143 2144
checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"
2144 2145
dependencies = [
2145 2146
 "aws-lc-rs",
2147 +
 "log",
2146 2148
 "once_cell",
2149 +
 "ring",
2147 2150
 "rustls-pki-types",
2148 2151
 "rustls-webpki",
2149 2152
 "subtle",
2895 2898
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
2896 2899
2897 2900
[[package]]
2901 +
name = "ureq"
2902 +
version = "3.3.0"
2903 +
source = "registry+https://github.com/rust-lang/crates.io-index"
2904 +
checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0"
2905 +
dependencies = [
2906 +
 "base64",
2907 +
 "flate2",
2908 +
 "log",
2909 +
 "percent-encoding",
2910 +
 "rustls",
2911 +
 "rustls-pki-types",
2912 +
 "ureq-proto",
2913 +
 "utf8-zero",
2914 +
 "webpki-roots",
2915 +
]
2916 +
2917 +
[[package]]
2918 +
name = "ureq-proto"
2919 +
version = "0.6.0"
2920 +
source = "registry+https://github.com/rust-lang/crates.io-index"
2921 +
checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c"
2922 +
dependencies = [
2923 +
 "base64",
2924 +
 "http",
2925 +
 "httparse",
2926 +
 "log",
2927 +
]
2928 +
2929 +
[[package]]
2898 2930
name = "url"
2899 2931
version = "2.5.8"
2900 2932
source = "registry+https://github.com/rust-lang/crates.io-index"
2917 2949
version = "0.1.8"
2918 2950
source = "registry+https://github.com/rust-lang/crates.io-index"
2919 2951
checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091"
2952 +
2953 +
[[package]]
2954 +
name = "utf8-zero"
2955 +
version = "0.8.1"
2956 +
source = "registry+https://github.com/rust-lang/crates.io-index"
2957 +
checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e"
2920 2958
2921 2959
[[package]]
2922 2960
name = "utf8_iter"
3132 3170
version = "1.0.7"
3133 3171
source = "registry+https://github.com/rust-lang/crates.io-index"
3134 3172
checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c"
3173 +
dependencies = [
3174 +
 "rustls-pki-types",
3175 +
]
3176 +
3177 +
[[package]]
3178 +
name = "webpki-roots"
3179 +
version = "1.0.7"
3180 +
source = "registry+https://github.com/rust-lang/crates.io-index"
3181 +
checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d"
3135 3182
dependencies = [
3136 3183
 "rustls-pki-types",
3137 3184
]
Cargo.toml +1 −0
8 8
9 9
[dependencies]
10 10
color-eyre = "0.6.3"
11 +
ureq = "3"
11 12
crossterm = "0.29.0"
12 13
feedparser-rs = "0.5.3"
13 14
open = "5"
src/main.rs +99 −1
9 9
    widgets::{Block, List, ListItem, ListState, Padding},
10 10
};
11 11
12 +
fn normalize_url(s: &str) -> String {
13 +
    if s.starts_with("http://") || s.starts_with("https://") {
14 +
        s.to_string()
15 +
    } else {
16 +
        format!("https://{s}")
17 +
    }
18 +
}
19 +
20 +
fn is_bare_domain(url: &str) -> bool {
21 +
    let rest = url
22 +
        .strip_prefix("https://")
23 +
        .or_else(|| url.strip_prefix("http://"))
24 +
        .unwrap_or(url);
25 +
    let path = rest.find('/').map(|i| &rest[i..]).unwrap_or("");
26 +
    path.trim_matches('/').is_empty()
27 +
}
28 +
29 +
fn find_feed_link(html: &str, base_url: &str) -> Option<String> {
30 +
    let base = base_url.trim_end_matches('/');
31 +
    let lower = html.to_lowercase();
32 +
    let mut pos = 0;
33 +
    while let Some(tag_start) = lower[pos..].find("<link") {
34 +
        let abs = pos + tag_start;
35 +
        let tag_end = lower[abs..].find('>')? + abs;
36 +
        let tag = &html[abs..=tag_end];
37 +
        let tag_lower = tag.to_lowercase();
38 +
        let is_feed =
39 +
            tag_lower.contains("application/rss+xml") || tag_lower.contains("application/atom+xml");
40 +
        if is_feed {
41 +
            if let Some(href) = extract_attr(tag, "href") {
42 +
                let resolved = if href.starts_with("http://") || href.starts_with("https://") {
43 +
                    href
44 +
                } else if href.starts_with('/') {
45 +
                    format!("{base}{href}")
46 +
                } else {
47 +
                    format!("{base}/{href}")
48 +
                };
49 +
                return Some(resolved);
50 +
            }
51 +
        }
52 +
        pos = tag_end + 1;
53 +
    }
54 +
    None
55 +
}
56 +
57 +
fn extract_attr(tag: &str, attr: &str) -> Option<String> {
58 +
    let search = format!("{attr}=");
59 +
    let lower = tag.to_lowercase();
60 +
    let start = lower.find(&search)? + search.len();
61 +
    let rest = &tag[start..];
62 +
    let (quote, end_char) = if rest.starts_with('"') {
63 +
        (&rest[1..], '"')
64 +
    } else if rest.starts_with('\'') {
65 +
        (&rest[1..], '\'')
66 +
    } else {
67 +
        return None;
68 +
    };
69 +
    let end = quote.find(end_char)?;
70 +
    Some(quote[..end].to_string())
71 +
}
72 +
73 +
fn discover_feed(input: &str) -> color_eyre::Result<String> {
74 +
    let url = normalize_url(input);
75 +
    if !is_bare_domain(&url) {
76 +
        return Ok(url);
77 +
    }
78 +
    let html = ureq::get(&url).call()?.body_mut().read_to_string()?;
79 +
    if let Some(feed_url) = find_feed_link(&html, &url) {
80 +
        return Ok(feed_url);
81 +
    }
82 +
    let base = url.trim_end_matches('/');
83 +
    const PATHS: &[&str] = &[
84 +
        "/feed.xml",
85 +
        "/rss.xml",
86 +
        "/atom.xml",
87 +
        "/feed",
88 +
        "/rss",
89 +
        "/index.xml",
90 +
        "/feeds/posts/default",
91 +
        "/blog/feed.xml",
92 +
        "/blog/rss.xml",
93 +
    ];
94 +
    for path in PATHS {
95 +
        let candidate = format!("{base}{path}");
96 +
        if ureq::get(&candidate)
97 +
            .call()
98 +
            .map(|r| r.status() == 200)
99 +
            .unwrap_or(false)
100 +
        {
101 +
            return Ok(candidate);
102 +
        }
103 +
    }
104 +
    Err(color_eyre::eyre::eyre!("No feed found for: {input}"))
105 +
}
106 +
12 107
fn main() -> color_eyre::Result<()> {
13 108
    color_eyre::install()?;
14 109
    let urls: Vec<String> = std::env::args().skip(1).collect();
19 114
    }
20 115
    let feeds: Vec<ParsedFeed> = urls
21 116
        .iter()
22 -
        .map(|url| parse_url(url, None, None, None))
117 +
        .map(|url| -> color_eyre::Result<ParsedFeed> {
118 +
            let resolved = discover_feed(url)?;
119 +
            Ok(parse_url(&resolved, None, None, None)?)
120 +
        })
23 121
        .collect::<Result<_, _>>()?;
24 122
25 123
    let mut entries: Vec<(&Entry, Option<&str>)> = feeds