Merge pull request #41 from stevedylandev/feat/posts-bulk-import
9bd66759
feat: add bulk import for posts
6 file(s) · +367 −3
feat: add bulk import for posts
| 38 | 38 | - RSS feed at `/feed.xml` |
|
| 39 | 39 | - Dark themed UI with Commit Mono font |
|
| 40 | 40 | - SQLite for persistent storage |
|
| 41 | + | - Bulk import posts from a zip of markdown files at `/admin/import` |
|
| 41 | 42 | ||
| 42 | 43 | ## Structure |
|
| 43 | 44 | ||
| 92 | 93 | ``` |
|
| 93 | 94 | ||
| 94 | 95 | The resulting binary at `./target/release/posts` is self-contained with all assets embedded. Copy it to your server with a configured `.env` file and run it directly. |
|
| 96 | + | ||
| 97 | + | ## Importing posts |
|
| 98 | + | ||
| 99 | + | Visit `/admin/import` and upload a zip containing `.md` or `.markdown` files. Each file may start with YAML-style frontmatter: |
|
| 100 | + | ||
| 101 | + | ```markdown |
|
| 102 | + | --- |
|
| 103 | + | title: My Post |
|
| 104 | + | slug: my-post |
|
| 105 | + | status: published |
|
| 106 | + | published_date: 2025-01-15 10:00:00 |
|
| 107 | + | tags: rust, sqlite |
|
| 108 | + | description: A short summary |
|
| 109 | + | lang: en |
|
| 110 | + | --- |
|
| 111 | + | ||
| 112 | + | Post body in markdown. |
|
| 113 | + | ``` |
|
| 114 | + | ||
| 115 | + | Supported keys: `title`, `slug`, `status` (`draft` or `published`), `published_date`, `tags`, `description`, `meta_image`, `alias`, `lang`. Files without frontmatter are imported with the title derived from the filename and a slug auto-generated from that title. Posts whose slug already exists in the database are skipped, so re-uploading the same archive is safe. |
|
| 116 | + | ||
| 117 | + | The zip can be up to 50MB. Asset references inside posts (images, etc.) are left untouched — pre-host them or upload them separately at `/admin/files`. |
|
| 95 | 118 | ||
| 96 | 119 | ## Acknowledgements |
|
| 97 | 120 | ||
| 4 | 4 | ||
| 5 | 5 | #[tokio::main] |
|
| 6 | 6 | async fn main() { |
|
| 7 | + | dotenvy::dotenv().ok(); |
|
| 7 | 8 | tracing_subscriber::fmt::init(); |
|
| 8 | 9 | let host = std::env::var("HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); |
|
| 9 | 10 | let port: u16 = std::env::var("PORT") |
| 4 | 4 | http::{HeaderValue, StatusCode}, |
|
| 5 | 5 | response::{Html, IntoResponse, Redirect, Response}, |
|
| 6 | 6 | }; |
|
| 7 | + | use std::io::{Cursor, Read}; |
|
| 7 | 8 | use std::sync::Arc; |
|
| 9 | + | use zip::ZipArchive; |
|
| 8 | 10 | ||
| 9 | 11 | use super::super::*; |
|
| 10 | 12 | use crate::{auth, db}; |
|
| 612 | 614 | } |
|
| 613 | 615 | } |
|
| 614 | 616 | } |
|
| 617 | + | ||
| 618 | + | // --- Import handlers --- |
|
| 619 | + | ||
| 620 | + | const IMPORT_MAX_BYTES: usize = 50 * 1024 * 1024; |
|
| 621 | + | ||
| 622 | + | pub async fn admin_import_form( |
|
| 623 | + | _session: auth::AuthSession, |
|
| 624 | + | Query(q): Query<FlashQuery>, |
|
| 625 | + | ) -> Response { |
|
| 626 | + | WebTemplate(AdminImportTemplate { |
|
| 627 | + | error: q.error, |
|
| 628 | + | imported: q.imported, |
|
| 629 | + | skipped: q.skipped, |
|
| 630 | + | }) |
|
| 631 | + | .into_response() |
|
| 632 | + | } |
|
| 633 | + | ||
| 634 | + | pub async fn admin_import_posts( |
|
| 635 | + | _session: auth::AuthSession, |
|
| 636 | + | State(state): State<Arc<AppState>>, |
|
| 637 | + | mut multipart: Multipart, |
|
| 638 | + | ) -> Response { |
|
| 639 | + | let mut zip_data: Option<Vec<u8>> = None; |
|
| 640 | + | while let Ok(Some(field)) = multipart.next_field().await { |
|
| 641 | + | if field.name() == Some("zip") { |
|
| 642 | + | match field.bytes().await { |
|
| 643 | + | Ok(bytes) => zip_data = Some(bytes.to_vec()), |
|
| 644 | + | Err(e) => { |
|
| 645 | + | tracing::error!("Failed to read import upload: {}", e); |
|
| 646 | + | return Redirect::to("/admin/import?error=Failed+to+read+upload").into_response(); |
|
| 647 | + | } |
|
| 648 | + | } |
|
| 649 | + | } |
|
| 650 | + | } |
|
| 651 | + | ||
| 652 | + | let bytes = match zip_data { |
|
| 653 | + | Some(b) => b, |
|
| 654 | + | None => return Redirect::to("/admin/import?error=No+zip+provided").into_response(), |
|
| 655 | + | }; |
|
| 656 | + | if bytes.len() > IMPORT_MAX_BYTES { |
|
| 657 | + | return Redirect::to("/admin/import?error=Zip+exceeds+50MB+limit").into_response(); |
|
| 658 | + | } |
|
| 659 | + | ||
| 660 | + | let db = state.db.clone(); |
|
| 661 | + | let result = tokio::task::spawn_blocking(move || process_import_zip(&db, &bytes)).await; |
|
| 662 | + | ||
| 663 | + | match result { |
|
| 664 | + | Ok(Ok(summary)) => Redirect::to(&format!( |
|
| 665 | + | "/admin/import?imported={}&skipped={}", |
|
| 666 | + | summary.imported, summary.skipped |
|
| 667 | + | )) |
|
| 668 | + | .into_response(), |
|
| 669 | + | Ok(Err(e)) => { |
|
| 670 | + | tracing::error!("Import failed: {}", e); |
|
| 671 | + | Redirect::to("/admin/import?error=Invalid+zip+archive").into_response() |
|
| 672 | + | } |
|
| 673 | + | Err(e) => { |
|
| 674 | + | tracing::error!("Import join error: {}", e); |
|
| 675 | + | Redirect::to("/admin/import?error=Server+error").into_response() |
|
| 676 | + | } |
|
| 677 | + | } |
|
| 678 | + | } |
|
| 679 | + | ||
| 680 | + | struct ImportSummary { |
|
| 681 | + | imported: u32, |
|
| 682 | + | skipped: u32, |
|
| 683 | + | } |
|
| 684 | + | ||
| 685 | + | fn process_import_zip(db: &db::Db, bytes: &[u8]) -> Result<ImportSummary, String> { |
|
| 686 | + | let mut archive = ZipArchive::new(Cursor::new(bytes)) |
|
| 687 | + | .map_err(|e| format!("Bad zip: {}", e))?; |
|
| 688 | + | ||
| 689 | + | let mut imported = 0u32; |
|
| 690 | + | let mut skipped = 0u32; |
|
| 691 | + | ||
| 692 | + | for i in 0..archive.len() { |
|
| 693 | + | let mut file = match archive.by_index(i) { |
|
| 694 | + | Ok(f) => f, |
|
| 695 | + | Err(e) => { |
|
| 696 | + | tracing::warn!("Skipping zip entry {}: {}", i, e); |
|
| 697 | + | continue; |
|
| 698 | + | } |
|
| 699 | + | }; |
|
| 700 | + | if file.is_dir() { |
|
| 701 | + | continue; |
|
| 702 | + | } |
|
| 703 | + | let name = match file.enclosed_name() { |
|
| 704 | + | Some(p) => p.to_string_lossy().into_owned(), |
|
| 705 | + | None => continue, |
|
| 706 | + | }; |
|
| 707 | + | if name.starts_with("__MACOSX/") { |
|
| 708 | + | continue; |
|
| 709 | + | } |
|
| 710 | + | let basename = std::path::Path::new(&name) |
|
| 711 | + | .file_name() |
|
| 712 | + | .and_then(|s| s.to_str()) |
|
| 713 | + | .unwrap_or(""); |
|
| 714 | + | if basename.is_empty() || basename.starts_with('.') { |
|
| 715 | + | continue; |
|
| 716 | + | } |
|
| 717 | + | let lower = basename.to_lowercase(); |
|
| 718 | + | if !(lower.ends_with(".md") || lower.ends_with(".markdown")) { |
|
| 719 | + | continue; |
|
| 720 | + | } |
|
| 721 | + | ||
| 722 | + | let mut raw = String::new(); |
|
| 723 | + | if let Err(e) = file.read_to_string(&mut raw) { |
|
| 724 | + | tracing::warn!("Skipping {}: read error {}", name, e); |
|
| 725 | + | continue; |
|
| 726 | + | } |
|
| 727 | + | ||
| 728 | + | if !import_one(db, basename, &raw, &mut imported, &mut skipped) { |
|
| 729 | + | skipped += 1; |
|
| 730 | + | } |
|
| 731 | + | } |
|
| 732 | + | ||
| 733 | + | Ok(ImportSummary { imported, skipped }) |
|
| 734 | + | } |
|
| 735 | + | ||
| 736 | + | fn import_one( |
|
| 737 | + | db: &db::Db, |
|
| 738 | + | basename: &str, |
|
| 739 | + | raw: &str, |
|
| 740 | + | imported: &mut u32, |
|
| 741 | + | skipped: &mut u32, |
|
| 742 | + | ) -> bool { |
|
| 743 | + | let (frontmatter, body) = split_frontmatter(raw); |
|
| 744 | + | let attrs = parse_attributes(frontmatter.unwrap_or("")); |
|
| 745 | + | ||
| 746 | + | let title = if attrs.title.trim().is_empty() { |
|
| 747 | + | title_from_filename(basename) |
|
| 748 | + | } else { |
|
| 749 | + | attrs.title.trim().to_string() |
|
| 750 | + | }; |
|
| 751 | + | if title.is_empty() { |
|
| 752 | + | return false; |
|
| 753 | + | } |
|
| 754 | + | ||
| 755 | + | let slug = if attrs.slug.trim().is_empty() { |
|
| 756 | + | slugify(&title) |
|
| 757 | + | } else { |
|
| 758 | + | attrs.slug.trim().to_string() |
|
| 759 | + | }; |
|
| 760 | + | if slug.is_empty() { |
|
| 761 | + | return false; |
|
| 762 | + | } |
|
| 763 | + | ||
| 764 | + | match db::get_post_by_slug(db, &slug) { |
|
| 765 | + | Ok(Some(_)) => { |
|
| 766 | + | *skipped += 1; |
|
| 767 | + | return true; |
|
| 768 | + | } |
|
| 769 | + | Ok(None) => {} |
|
| 770 | + | Err(e) => { |
|
| 771 | + | tracing::warn!("DB error checking slug {}: {}", slug, e); |
|
| 772 | + | return false; |
|
| 773 | + | } |
|
| 774 | + | } |
|
| 775 | + | ||
| 776 | + | let status = if attrs.status.trim().eq_ignore_ascii_case("published") { |
|
| 777 | + | "published" |
|
| 778 | + | } else { |
|
| 779 | + | "draft" |
|
| 780 | + | }; |
|
| 781 | + | let lang = if attrs.lang.trim().is_empty() { |
|
| 782 | + | "en" |
|
| 783 | + | } else { |
|
| 784 | + | attrs.lang.trim() |
|
| 785 | + | }; |
|
| 786 | + | let published_date = if attrs.published_date.trim().is_empty() { |
|
| 787 | + | now_datetime() |
|
| 788 | + | } else { |
|
| 789 | + | attrs.published_date.trim().to_string() |
|
| 790 | + | }; |
|
| 791 | + | ||
| 792 | + | let input = db::PostInput { |
|
| 793 | + | title: &title, |
|
| 794 | + | slug: &slug, |
|
| 795 | + | content: body, |
|
| 796 | + | status, |
|
| 797 | + | alias: opt_str(&attrs.alias), |
|
| 798 | + | canonical_url: None, |
|
| 799 | + | published_date: Some(&published_date), |
|
| 800 | + | meta_description: opt_str(&attrs.meta_description), |
|
| 801 | + | meta_image: opt_str(&attrs.meta_image), |
|
| 802 | + | lang, |
|
| 803 | + | tags: opt_str(&attrs.tags), |
|
| 804 | + | }; |
|
| 805 | + | match db::create_post(db, &input) { |
|
| 806 | + | Ok(_) => { |
|
| 807 | + | *imported += 1; |
|
| 808 | + | true |
|
| 809 | + | } |
|
| 810 | + | Err(e) => { |
|
| 811 | + | tracing::warn!("Failed to insert {}: {}", slug, e); |
|
| 812 | + | false |
|
| 813 | + | } |
|
| 814 | + | } |
|
| 815 | + | } |
|
| 816 | + | ||
| 817 | + | fn split_frontmatter(content: &str) -> (Option<&str>, &str) { |
|
| 818 | + | let trimmed = content.trim_start_matches('\u{feff}'); |
|
| 819 | + | let after_open = if let Some(rest) = trimmed.strip_prefix("---\n") { |
|
| 820 | + | rest |
|
| 821 | + | } else if let Some(rest) = trimmed.strip_prefix("---\r\n") { |
|
| 822 | + | rest |
|
| 823 | + | } else { |
|
| 824 | + | return (None, content); |
|
| 825 | + | }; |
|
| 826 | + | for sep in ["\r\n---\r\n", "\r\n---\n", "\n---\r\n", "\n---\n"] { |
|
| 827 | + | if let Some((fm, rest)) = after_open.split_once(sep) { |
|
| 828 | + | let body = rest.trim_start_matches(['\r', '\n']); |
|
| 829 | + | return (Some(fm), body); |
|
| 830 | + | } |
|
| 831 | + | } |
|
| 832 | + | if let Some(fm) = after_open.strip_suffix("\n---").or_else(|| after_open.strip_suffix("\r\n---")) { |
|
| 833 | + | return (Some(fm), ""); |
|
| 834 | + | } |
|
| 835 | + | (None, content) |
|
| 836 | + | } |
|
| 837 | + | ||
| 838 | + | fn title_from_filename(name: &str) -> String { |
|
| 839 | + | let stem = name.rsplit_once('.').map(|(s, _)| s).unwrap_or(name); |
|
| 840 | + | let cleaned: String = stem |
|
| 841 | + | .chars() |
|
| 842 | + | .map(|c| if c == '-' || c == '_' { ' ' } else { c }) |
|
| 843 | + | .collect(); |
|
| 844 | + | let trimmed = cleaned.trim(); |
|
| 845 | + | let mut chars = trimmed.chars(); |
|
| 846 | + | match chars.next() { |
|
| 847 | + | Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(), |
|
| 848 | + | None => String::new(), |
|
| 849 | + | } |
|
| 850 | + | } |
|
| 851 | + | ||
| 852 | + | #[cfg(test)] |
|
| 853 | + | mod tests { |
|
| 854 | + | use super::*; |
|
| 855 | + | ||
| 856 | + | #[test] |
|
| 857 | + | fn split_frontmatter_basic() { |
|
| 858 | + | let input = "---\ntitle: Hello\nslug: hello\n---\n# Body\n"; |
|
| 859 | + | let (fm, body) = split_frontmatter(input); |
|
| 860 | + | assert_eq!(fm, Some("title: Hello\nslug: hello")); |
|
| 861 | + | assert_eq!(body, "# Body\n"); |
|
| 862 | + | } |
|
| 863 | + | ||
| 864 | + | #[test] |
|
| 865 | + | fn split_frontmatter_crlf() { |
|
| 866 | + | let input = "---\r\ntitle: Hi\r\n---\r\nbody\r\n"; |
|
| 867 | + | let (fm, body) = split_frontmatter(input); |
|
| 868 | + | assert_eq!(fm, Some("title: Hi")); |
|
| 869 | + | assert_eq!(body, "body\r\n"); |
|
| 870 | + | } |
|
| 871 | + | ||
| 872 | + | #[test] |
|
| 873 | + | fn split_frontmatter_no_fence() { |
|
| 874 | + | let (fm, body) = split_frontmatter("# Just markdown\n\ncontent"); |
|
| 875 | + | assert!(fm.is_none()); |
|
| 876 | + | assert_eq!(body, "# Just markdown\n\ncontent"); |
|
| 877 | + | } |
|
| 878 | + | ||
| 879 | + | #[test] |
|
| 880 | + | fn split_frontmatter_strips_bom() { |
|
| 881 | + | let input = "\u{feff}---\ntitle: Hi\n---\nbody"; |
|
| 882 | + | let (fm, body) = split_frontmatter(input); |
|
| 883 | + | assert_eq!(fm, Some("title: Hi")); |
|
| 884 | + | assert_eq!(body, "body"); |
|
| 885 | + | } |
|
| 886 | + | ||
| 887 | + | #[test] |
|
| 888 | + | fn title_from_filename_replaces_separators() { |
|
| 889 | + | assert_eq!(title_from_filename("my-cool-post.md"), "My cool post"); |
|
| 890 | + | assert_eq!(title_from_filename("hello_world.markdown"), "Hello world"); |
|
| 891 | + | assert_eq!(title_from_filename("noext"), "Noext"); |
|
| 892 | + | } |
|
| 893 | + | ||
| 894 | + | #[test] |
|
| 895 | + | fn parse_attributes_picks_up_status() { |
|
| 896 | + | let attrs = parse_attributes("title: T\nstatus: published\n"); |
|
| 897 | + | assert_eq!(attrs.title, "T"); |
|
| 898 | + | assert_eq!(attrs.status, "published"); |
|
| 899 | + | } |
|
| 900 | + | } |
|
| 162 | 162 | success: bool, |
|
| 163 | 163 | } |
|
| 164 | 164 | ||
| 165 | + | #[derive(Template)] |
|
| 166 | + | #[template(path = "admin_import.html")] |
|
| 167 | + | struct AdminImportTemplate { |
|
| 168 | + | error: Option<String>, |
|
| 169 | + | imported: Option<u32>, |
|
| 170 | + | skipped: Option<u32>, |
|
| 171 | + | } |
|
| 172 | + | ||
| 165 | 173 | // --- Query/Form structs --- |
|
| 166 | 174 | ||
| 167 | 175 | #[derive(serde::Deserialize, Default)] |
|
| 169 | 177 | pub error: Option<String>, |
|
| 170 | 178 | #[serde(default)] |
|
| 171 | 179 | pub success: bool, |
|
| 180 | + | pub imported: Option<u32>, |
|
| 181 | + | pub skipped: Option<u32>, |
|
| 172 | 182 | } |
|
| 173 | 183 | ||
| 174 | 184 | #[derive(serde::Deserialize)] |
|
| 193 | 203 | meta_image: String, |
|
| 194 | 204 | lang: String, |
|
| 195 | 205 | tags: String, |
|
| 206 | + | status: String, |
|
| 196 | 207 | } |
|
| 197 | 208 | ||
| 198 | 209 | fn parse_attributes(text: &str) -> ParsedAttributes { |
|
| 205 | 216 | meta_image: String::new(), |
|
| 206 | 217 | lang: String::new(), |
|
| 207 | 218 | tags: String::new(), |
|
| 219 | + | status: String::new(), |
|
| 208 | 220 | }; |
|
| 209 | 221 | for line in text.lines() { |
|
| 210 | 222 | if let Some((key, value)) = line.split_once(':') { |
|
| 219 | 231 | "meta_image" => attrs.meta_image = value, |
|
| 220 | 232 | "lang" => attrs.lang = value, |
|
| 221 | 233 | "tags" => attrs.tags = value, |
|
| 234 | + | "status" => attrs.status = value, |
|
| 222 | 235 | _ => {} |
|
| 223 | 236 | } |
|
| 224 | 237 | } |
|
| 505 | 518 | pub async fn run(host: String, port: u16) { |
|
| 506 | 519 | use handlers::{admin, api, public}; |
|
| 507 | 520 | ||
| 508 | - | dotenvy::dotenv().ok(); |
|
| 509 | - | ||
| 510 | 521 | let db = db::init_db(); |
|
| 511 | 522 | ||
| 512 | 523 | if let Err(e) = db::prune_expired_sessions(&db) { |
|
| 586 | 597 | // Admin downloads |
|
| 587 | 598 | .route("/admin/downloads/posts", get(admin::admin_download_posts)) |
|
| 588 | 599 | .route("/admin/downloads/uploads", get(admin::admin_download_uploads)) |
|
| 600 | + | // Admin import |
|
| 601 | + | .route( |
|
| 602 | + | "/admin/import", |
|
| 603 | + | get(admin::admin_import_form).post(admin::admin_import_posts), |
|
| 604 | + | ) |
|
| 589 | 605 | // Admin files |
|
| 590 | 606 | .route("/admin/files", get(admin::admin_files)) |
|
| 591 | 607 | .route("/admin/files/upload", post(admin::admin_upload_file)) |
|
| 599 | 615 | // Fallback |
|
| 600 | 616 | .fallback(get(public::fallback_handler)) |
|
| 601 | 617 | .with_state(state) |
|
| 602 | - | .layer(DefaultBodyLimit::max(11 * 1024 * 1024)); |
|
| 618 | + | .layer(DefaultBodyLimit::max(51 * 1024 * 1024)); |
|
| 603 | 619 | ||
| 604 | 620 | let addr = format!("{}:{}", host, port); |
|
| 605 | 621 | tracing::info!("Listening on http://{}", addr); |
|
| 16 | 16 | <a href="/admin">posts</a> |
|
| 17 | 17 | <a href="/admin/pages">pages</a> |
|
| 18 | 18 | <a href="/admin/files">files</a> |
|
| 19 | + | <a href="/admin/import">import</a> |
|
| 19 | 20 | <a href="/admin/settings">settings</a> |
|
| 20 | 21 | <a href="/" target="_blank">view site</a> |
|
| 21 | 22 | <a href="/admin/logout">logout</a> |
| 1 | + | {% extends "admin_base.html" %} |
|
| 2 | + | {% block title %}Admin — Import{% endblock %} |
|
| 3 | + | {% block content %} |
|
| 4 | + | <div class="admin-toolbar"> |
|
| 5 | + | <h2>Import posts</h2> |
|
| 6 | + | </div> |
|
| 7 | + | {% if let Some(err) = error %} |
|
| 8 | + | <p class="error">{{ err }}</p> |
|
| 9 | + | {% endif %} |
|
| 10 | + | {% if let Some(n) = imported %} |
|
| 11 | + | <p class="success">Imported {{ n }} posts, skipped {{ skipped.unwrap_or(0) }}.</p> |
|
| 12 | + | {% endif %} |
|
| 13 | + | <form method="POST" action="/admin/import" enctype="multipart/form-data" class="form"> |
|
| 14 | + | <label for="zip">upload zip of markdown files (max 50MB)</label> |
|
| 15 | + | <input type="file" id="zip" name="zip" accept=".zip" required> |
|
| 16 | + | <button type="submit">import</button> |
|
| 17 | + | </form> |
|
| 18 | + | <section> |
|
| 19 | + | <h3>Format</h3> |
|
| 20 | + | <p>The zip can contain any number of <code>.md</code> or <code>.markdown</code> files. Each file may begin with YAML-style frontmatter:</p> |
|
| 21 | + | <pre><code>--- |
|
| 22 | + | title: My Post |
|
| 23 | + | slug: my-post |
|
| 24 | + | status: published |
|
| 25 | + | published_date: 2025-01-15 10:00:00 |
|
| 26 | + | tags: rust, sqlite |
|
| 27 | + | description: A short summary |
|
| 28 | + | lang: en |
|
| 29 | + | --- |
|
| 30 | + | ||
| 31 | + | # Hello |
|
| 32 | + | ||
| 33 | + | Post body in markdown. |
|
| 34 | + | </code></pre> |
|
| 35 | + | <p>Supported keys: <code>title</code>, <code>slug</code>, <code>status</code> (<code>draft</code> or <code>published</code>), <code>published_date</code>, <code>tags</code>, <code>description</code>, <code>meta_image</code>, <code>alias</code>, <code>lang</code>. Files without frontmatter are imported with the title derived from the filename. Posts whose slug already exists are skipped.</p> |
|
| 36 | + | </section> |
|
| 37 | + | {% endblock %} |