From 07693a79255ed1442cd19cb533301c18686fffea Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 16:19:32 -0400 Subject: [PATCH 01/18] Add backup prune CLI and update roadmap --- cli-bin/docs/cli_cheatsheet.md | 1 + cli-bin/src/cli.rs | 5 ++- cli-bin/src/cli/backup.rs | 67 ++++++++++++++++++++++++++++++++++ cli-bin/src/cli/commands.yaml | 6 +++ cli-bin/src/main.rs | 7 ++-- docs/roadmap.md | 34 ++++++++++------- libmarlin/src/backup.rs | 27 ++++++++++++++ 7 files changed, 128 insertions(+), 19 deletions(-) create mode 100644 cli-bin/src/cli/backup.rs diff --git a/cli-bin/docs/cli_cheatsheet.md b/cli-bin/docs/cli_cheatsheet.md index f9297c7..402fe7f 100644 --- a/cli-bin/docs/cli_cheatsheet.md +++ b/cli-bin/docs/cli_cheatsheet.md @@ -21,3 +21,4 @@ | `version diff` | — | | `event add` | — | | `event timeline` | — | +| `backup run` | --dir, --prune, --verify, --file | diff --git a/cli-bin/src/cli.rs b/cli-bin/src/cli.rs index 2cc734c..a688035 100644 --- a/cli-bin/src/cli.rs +++ b/cli-bin/src/cli.rs @@ -1,6 +1,7 @@ // src/cli.rs pub mod annotate; +pub mod backup; pub mod coll; pub mod event; pub mod link; @@ -73,8 +74,8 @@ pub enum Commands { exec: Option, }, - /// Create a timestamped backup of the database - Backup, + /// Create or manage database backups + Backup(backup::BackupOpts), /// Restore from a backup file (overwrites current DB) Restore { backup_path: std::path::PathBuf }, diff --git a/cli-bin/src/cli/backup.rs b/cli-bin/src/cli/backup.rs new file mode 100644 index 0000000..9d219a8 --- /dev/null +++ b/cli-bin/src/cli/backup.rs @@ -0,0 +1,67 @@ +// src/cli/backup.rs +use crate::cli::Format; +use anyhow::{Context, Result}; +use clap::Args; +use libmarlin::backup::BackupManager; +use rusqlite::Connection; +use std::path::{Path, PathBuf}; + +/// Options for the `backup` command +#[derive(Args, Debug)] +pub struct BackupOpts { + /// Directory to store backups (defaults next to DB) + #[arg(long)] + pub dir: Option, + + /// Keep only N newest backups + #[arg(long)] + pub prune: Option, + + /// Verify a backup file + #[arg(long)] + pub verify: bool, + + /// Backup file to verify (used with --verify) + #[arg(long)] + pub file: Option, +} + +pub fn run(opts: &BackupOpts, db_path: &Path, _conn: &mut Connection, _fmt: Format) -> Result<()> { + let backups_dir = opts + .dir + .clone() + .unwrap_or_else(|| db_path.parent().unwrap().join("backups")); + let manager = BackupManager::new(db_path, &backups_dir)?; + + if opts.verify { + let file = opts + .file + .as_ref() + .context("--file required with --verify")?; + let name = file + .file_name() + .and_then(|n| n.to_str()) + .context("invalid backup file name")?; + let ok = manager.verify_backup(name)?; + if ok { + println!("Backup OK: {}", name); + } else { + println!("Backup corrupted: {}", name); + } + return Ok(()); + } + + if let Some(n) = opts.prune { + let result = manager.prune(n)?; + println!( + "Pruned {} old backups, kept {}", + result.removed.len(), + result.kept.len() + ); + return Ok(()); + } + + let info = manager.create_backup()?; + println!("Created backup {}", info.id); + Ok(()) +} diff --git a/cli-bin/src/cli/commands.yaml b/cli-bin/src/cli/commands.yaml index 19ea663..343d6dc 100644 --- a/cli-bin/src/cli/commands.yaml +++ b/cli-bin/src/cli/commands.yaml @@ -79,3 +79,9 @@ event: add: args: [file, date, description] timeline: {} + +backup: + description: "Create, prune or verify backups" + actions: + run: + flags: ["--dir", "--prune", "--verify", "--file"] diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs index 4be42fa..b178bff 100644 --- a/cli-bin/src/main.rs +++ b/cli-bin/src/main.rs @@ -41,7 +41,7 @@ fn main() -> Result<()> { let cfg = config::Config::load()?; // resolves DB path match &args.command { - Commands::Init | Commands::Backup | Commands::Restore { .. } => {} + Commands::Init | Commands::Backup(_) | Commands::Restore { .. } => {} _ => match db::backup(&cfg.db_path) { Ok(p) => info!("Pre-command auto-backup created at {}", p.display()), Err(e) => error!("Failed to create pre-command auto-backup: {e}"), @@ -100,9 +100,8 @@ fn main() -> Result<()> { Commands::Search { query, exec } => run_search(&conn, &query, exec)?, /* ---- maintenance ---------------------------------------- */ - Commands::Backup => { - let p = db::backup(&cfg.db_path)?; - println!("Backup created: {}", p.display()); + Commands::Backup(opts) => { + cli::backup::run(&opts, &cfg.db_path, &mut conn, args.format)?; } Commands::Restore { backup_path } => { diff --git a/docs/roadmap.md b/docs/roadmap.md index 160513e..8c324cf 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,6 +1,6 @@ -# Marlin ― Delivery Road-map **v3** +# Marlin ― Delivery Road-map **v3.2** -*Engineering-ready version — updated 2025-05-17* +*Engineering-ready version — updated 2025-05-18* > **Legend** > **△** = engineering artefact (spec / ADR / perf target)  **✦** = user-visible deliverable @@ -39,15 +39,20 @@ ### 2 · Feature cross-matrix (quick look-ups) -| Capability | Sprint / Phase | CLI flag or GUI element | Linked DP | -| ------------------------------------- | -------------- | ---------------------------------- | --------- | -| Relationship **templates** | P7 | `template new`, `template apply` | DP-008 | -| Positive / negative filter combinator | P6 | DSL `+tag:foo -tag:bar date>=2025` | DP-007 | -| ~~Dirty-scan optimisation~~ | ~~E1~~ | ~~`scan --dirty`~~ | ~~DP-002~~ | -| Watch-mode | E2 | `marlin watch .` | DP-003 | -| Grep snippets | P3 | `search -C3 "foo"` | DP-004 | -| Hash / dedupe | P4 | `scan --rehash` | DP-005 | +| Capability | Sprint / Phase | CLI / GUI element | Linked DP | +| -------------------------- | -------------- | -------------------- | --------- | +| Crate split & docs autogen | S0 | — | – | +| Tarpaulin coverage gate | S0 | — | – | +| Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 | +| Backup auto‑prune | Epic 1 | `backup --prune N` | – | +| Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 | +| Grep snippets | Phase 3 | `search -C3 …` | DP‑004 | +| Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 | +| Tag aliases | Phase 5 | `tag alias` commands | DP‑006 | +| Search DSL v2 | Phase 6 | new grammar, `--legacy-search` flag | DP‑007 | +| Relationship templates | Phase 7 | `template new/apply` | DP‑008 | +| TUI v1 | Phase 8 | `marlin‑tui` | DP‑009 | --- ## 3 · Milestone acceptance checklist @@ -65,8 +70,11 @@ Before a milestone is declared “shipped”: ### 4 · Next immediate actions -~~1. **Write DP-001 (Schema v1.1)** — owner @alice, due 21 May~~ -~~2. **Set up Tarpaulin & Hyperfine jobs** — @bob, due 23 May~~ -~~3. **Spike dirty-flag logic** — @carol 2-day time-box, outcome in DP-002~~ +| # | Task | Owner | Due | +| - | ------------------------------ | ------ | ------------- | +| 1 | Crate split + CI baseline | @alice | **26 May 25** | +| 2 | Tarpaulin + Hyperfine jobs | @bob | **26 May 25** | +| 3 | **DP‑001 Schema v1.1** draft | @carol | **30 May 25** | +| 4 | backup prune CLI + nightly job | @dave | **05 Jun 25** | > *This roadmap now contains both product-level “what” and engineering-level “how/when/prove it”. It should allow a new contributor to jump in, pick the matching DP, and know exactly the bar they must clear for their code to merge.* diff --git a/libmarlin/src/backup.rs b/libmarlin/src/backup.rs index 7834da3..b184cc4 100644 --- a/libmarlin/src/backup.rs +++ b/libmarlin/src/backup.rs @@ -216,6 +216,19 @@ impl BackupManager { Ok(PruneResult { kept, removed }) } + pub fn verify_backup(&self, backup_id: &str) -> Result { + let backup_file_path = self.backups_dir.join(backup_id); + if !backup_file_path.exists() || !backup_file_path.is_file() { + return Err(anyhow::Error::new(marlin_error::Error::NotFound(format!( + "Backup file not found or is not a file: {}", + backup_file_path.display() + )))); + } + let conn = rusqlite::Connection::open(&backup_file_path)?; + let res: String = conn.query_row("PRAGMA integrity_check", [], |r| r.get(0))?; + Ok(res == "ok") + } + pub fn restore_from_backup(&self, backup_id: &str) -> Result<()> { let backup_file_path = self.backups_dir.join(backup_id); if !backup_file_path.exists() || !backup_file_path.is_file() { @@ -532,4 +545,18 @@ mod tests { assert_eq!(info.id, "backup_badformat.db"); assert_eq!(info.timestamp, expected_ts); } + + #[test] + fn verify_backup_ok() { + let tmp = tempdir().unwrap(); + let live_db = tmp.path().join("live_verify.db"); + let _conn = create_valid_live_db(&live_db); + + let backups_dir = tmp.path().join("ver_backups"); + let manager = BackupManager::new(&live_db, &backups_dir).unwrap(); + let info = manager.create_backup().unwrap(); + + let ok = manager.verify_backup(&info.id).unwrap(); + assert!(ok, "expected integrity check to pass"); + } } From 832e8d476a373d299fe73e0afc0dd7ea82c01d8e Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 16:58:54 -0400 Subject: [PATCH 02/18] Add rename handling spec and roadmap entry --- docs/roadmap.md | 3 +- docs/spec-details/Rename+Move-Handling.md | 71 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 docs/spec-details/Rename+Move-Handling.md diff --git a/docs/roadmap.md b/docs/roadmap.md index 8c324cf..5b073f9 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -25,7 +25,7 @@ | Phase / Sprint | Timeline | Focus & Rationale | ✦ Key UX Deliverables | △ Engineering artefacts / tasks | Definition of Done | | --------------------------------------------- | -------- | ---------------------------------------- | -------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | | ~~**Epic 1 — Scale & Reliability**~~ | ~~2025-Q2~~ | ~~Stay fast @ 100 k files~~ | ~~• `scan --dirty` (re-index touched rows only)~~ | ~~• DP-002 Dirty-flag design + FTS rebuild cadence
• Hyperfine benchmark script committed~~ | ~~Dirty scan vs full ≤ 15 % runtime on 100 k corpus; benchmark job passes~~ | -| **Epic 2 — Live Mode & Self-Pruning Backups** | 2025-Q2 | “Just works” indexing, DB never explodes | • `marlin watch ` (notify/FSEvents)
• `backup --prune N` & auto-prune | • DP-003 file-watcher life-cycle & debouncing
• Integration test with inotify-sim
• Cron-style GitHub job for nightly prune | 8 h stress-watch alters 10 k files < 1 % misses; backup dir ≤ N | +| **Epic 2 — Live Mode & Self-Pruning Backups** | 2025-Q2 | “Just works” indexing, DB never explodes | • `marlin watch ` (notify/FSEvents)
• `backup --prune N` & auto-prune
• rename/move tracking keeps paths current | • DP-003 file-watcher life-cycle & debouncing
• Integration test with inotify-sim
• Rename/Move handling spec & tests
• Cron-style GitHub job for nightly prune | 8 h stress-watch alters 10 k files < 1 % misses; backup dir ≤ N | | **Phase 3 — Content FTS + Annotations** | 2025-Q3 | Search inside files, leave notes | • Grep-style snippet output (`-C3`)
• `marlin annotate add/list` | • DP-004 content-blob strategy (inline vs ext-table)
• Syntax-highlight via `syntect` PoC
• New FTS triggers unit-tested | Indexes 1 GB corpus in ≤ 30 min; snippet CLI passes golden-file tests | | **Phase 4 — Versioning & Deduplication** | 2025-Q3 | Historic diffs, detect dupes | • `scan --rehash` (SHA-256)
• `version diff ` | • DP-005 hash column + Bloom-de-dupe
• Binary diff adapter research | Diff on 10 MB file ≤ 500 ms; dupes listed via CLI | | **Phase 5 — Tag Aliases & Semantic Booster** | 2025-Q3 | Tame tag sprawl, start AI hints | • `tag alias add/ls/rm`
• `tag suggest`, `summary` | • DP-006 embeddings size & model choice
• Vector store schema + k-NN index bench | 95 % of “foo/bar~foo” alias look-ups resolve in one hop; suggest CLI returns ≤ 150 ms | @@ -46,6 +46,7 @@ | Tarpaulin coverage gate | S0 | — | – | | Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 | | Backup auto‑prune | Epic 1 | `backup --prune N` | – | +| Rename/move tracking | Epic 2 | automatic path update | – | | Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 | | Grep snippets | Phase 3 | `search -C3 …` | DP‑004 | | Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 | diff --git a/docs/spec-details/Rename+Move-Handling.md b/docs/spec-details/Rename+Move-Handling.md new file mode 100644 index 0000000..8daefa5 --- /dev/null +++ b/docs/spec-details/Rename+Move-Handling.md @@ -0,0 +1,71 @@ +# Marlin — Rename & Move Handling + +**Integration Specification · v0.1 (2025-05-19)** + +--- + +## 0 · Scope + +This document outlines how Marlin should respond when files or folders are renamed or moved. It extends the watcher life‑cycle design (DP‑003) so that metadata remains consistent without requiring a full re‑scan. + +## 1 · Background + +The current watcher maps any `notify::EventKind::Modify(_)` – including renames – to the generic `EventPriority::Modify` and merely logs the event: + +``` +415 let prio = match event.kind { +416 EventKind::Create(_) => EventPriority::Create, +417 EventKind::Remove(_) => EventPriority::Delete, +418 EventKind::Modify(_) => EventPriority::Modify, +419 EventKind::Access(_) => EventPriority::Access, +420 _ => EventPriority::Modify, +421 }; +... +455 for event_item in &evts_to_process { +456 info!("Processing event (DB available): {:?} for path {:?}", +457 event_item.kind, event_item.path); +458 } +``` + +No database update occurs, so renamed files keep their old `path` in the `files` table. The schema does have a trigger to propagate `path` updates to the FTS index: + +``` +72 -- When a file’s path changes +73 DROP TRIGGER IF EXISTS files_fts_au_file; +74 CREATE TRIGGER files_fts_au_file +75 AFTER UPDATE OF path ON files +76 BEGIN +77 UPDATE files_fts +78 SET path = NEW.path +79 WHERE rowid = NEW.id; +80 END; +``` + +## 2 · Requirements + +1. **Detect old and new paths** from `Rename` events provided by the `notify` crate. +2. **Update the `files` table** with the new absolute path when the target remains inside a scanned root. +3. **Mark as removed** if the new location is outside all configured roots. +4. **Batch updates** to avoid excessive writes during large folder moves. +5. **Integration tests** exercising rename and move scenarios across platforms. + +## 3 · Implementation Sketch + +* Extend `ProcessedEvent` to carry `old_path` and `new_path` for `Rename` events. +* Upon flushing events, call `db::mark_dirty` for the affected row, then update the `files.path` column. The existing trigger keeps `files_fts` in sync. +* For directory renames, update child paths with a single SQL `UPDATE ... WHERE path LIKE 'old/%'` inside a transaction. +* Emit `Create` and `Remove` events for files crossing watch boundaries so `scan --dirty` can prune or index them accordingly. + +## 4 · Edge Cases + +* **Atomic cross-filesystem moves** may surface as `Remove` + `Create`; both should be handled. +* **Concurrent modifications** while moving should result in the newer metadata winning when `scan --dirty` runs. + +## 5 · Future Work + +Large scale refactors (e.g. moving an entire project) may benefit from a high‑level command that updates tags and links en masse. That is outside the scope of this spec but enabled by accurate rename tracking. + +--- + +*End of document* + From 8c7c8a2395d2779cad6e4adff166cafcaae163c8 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 19:31:23 -0400 Subject: [PATCH 03/18] Handle backup IDs in restore command --- cli-bin/src/main.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs index b178bff..c2c8246 100644 --- a/cli-bin/src/main.rs +++ b/cli-bin/src/main.rs @@ -11,6 +11,7 @@ mod cli; // sub-command definitions and argument structs /* ── shared modules re-exported from libmarlin ─────────────────── */ use libmarlin::db::take_dirty; use libmarlin::{config, db, logging, scan, utils::determine_scan_root}; +use libmarlin::backup::BackupManager; use anyhow::{Context, Result}; use clap::{CommandFactory, Parser}; @@ -106,8 +107,20 @@ fn main() -> Result<()> { Commands::Restore { backup_path } => { drop(conn); - db::restore(&backup_path, &cfg.db_path) - .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; + if backup_path.exists() { + db::restore(&backup_path, &cfg.db_path) + .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; + } else { + let backups_dir = cfg.db_path.parent().unwrap().join("backups"); + let manager = BackupManager::new(&cfg.db_path, &backups_dir)?; + let name = backup_path + .file_name() + .and_then(|n| n.to_str()) + .context("invalid backup file name")?; + manager + .restore_from_backup(name) + .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; + } println!("Restored DB from {}", backup_path.display()); db::open(&cfg.db_path).with_context(|| { format!("Could not open restored DB at {}", cfg.db_path.display()) From 15e892055f0ae097467a8a10052b6454933beb3e Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 19:42:52 -0400 Subject: [PATCH 04/18] Format restore logic --- cli-bin/src/main.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs index b178bff..c1b3224 100644 --- a/cli-bin/src/main.rs +++ b/cli-bin/src/main.rs @@ -9,6 +9,7 @@ mod cli; // sub-command definitions and argument structs /* ── shared modules re-exported from libmarlin ─────────────────── */ +use libmarlin::backup::BackupManager; use libmarlin::db::take_dirty; use libmarlin::{config, db, logging, scan, utils::determine_scan_root}; @@ -106,8 +107,21 @@ fn main() -> Result<()> { Commands::Restore { backup_path } => { drop(conn); - db::restore(&backup_path, &cfg.db_path) - .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; + if backup_path.exists() { + db::restore(&backup_path, &cfg.db_path).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } else { + let backups_dir = cfg.db_path.parent().unwrap().join("backups"); + let manager = BackupManager::new(&cfg.db_path, &backups_dir)?; + let name = backup_path + .file_name() + .and_then(|n| n.to_str()) + .context("invalid backup file name")?; + manager.restore_from_backup(name).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } println!("Restored DB from {}", backup_path.display()); db::open(&cfg.db_path).with_context(|| { format!("Could not open restored DB at {}", cfg.db_path.display()) From 45c28d078053ee16c5d19555e1a83db80f0e321a Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 20:02:21 -0400 Subject: [PATCH 05/18] Fix restore fallback to backup ID --- cli-bin/src/main.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs index b178bff..c1b3224 100644 --- a/cli-bin/src/main.rs +++ b/cli-bin/src/main.rs @@ -9,6 +9,7 @@ mod cli; // sub-command definitions and argument structs /* ── shared modules re-exported from libmarlin ─────────────────── */ +use libmarlin::backup::BackupManager; use libmarlin::db::take_dirty; use libmarlin::{config, db, logging, scan, utils::determine_scan_root}; @@ -106,8 +107,21 @@ fn main() -> Result<()> { Commands::Restore { backup_path } => { drop(conn); - db::restore(&backup_path, &cfg.db_path) - .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; + if backup_path.exists() { + db::restore(&backup_path, &cfg.db_path).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } else { + let backups_dir = cfg.db_path.parent().unwrap().join("backups"); + let manager = BackupManager::new(&cfg.db_path, &backups_dir)?; + let name = backup_path + .file_name() + .and_then(|n| n.to_str()) + .context("invalid backup file name")?; + manager.restore_from_backup(name).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } println!("Restored DB from {}", backup_path.display()); db::open(&cfg.db_path).with_context(|| { format!("Could not open restored DB at {}", cfg.db_path.display()) From 99f72af4bcb74bfbb6a77cd4325415e9c0ff812a Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 20:14:56 -0400 Subject: [PATCH 06/18] update --- cli-bin/src/main.rs | 189 ++++++++++++++++++++++---------------------- 1 file changed, 93 insertions(+), 96 deletions(-) diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs index 57657fa..0a8766f 100644 --- a/cli-bin/src/main.rs +++ b/cli-bin/src/main.rs @@ -52,110 +52,107 @@ fn main() -> Result<()> { /* ── open DB (runs migrations) ───────────────────────────── */ let mut conn = db::open(&cfg.db_path)?; -/* ── command dispatch ────────────────────────────────────── */ -match args.command { - Commands::Completions { .. } => {} // handled above + /* ── command dispatch ────────────────────────────────────── */ + match args.command { + Commands::Completions { .. } => {} // handled above - /* ---- init ------------------------------------------------ */ - Commands::Init => { - info!("Database initialised at {}", cfg.db_path.display()); - let cwd = env::current_dir().context("getting current directory")?; - let count = - scan::scan_directory(&mut conn, &cwd).context("initial scan failed")?; - info!("Initial scan complete – indexed/updated {count} files"); - } - - /* ---- scan ------------------------------------------------ */ - Commands::Scan { dirty, paths } => { - let scan_paths: Vec = if paths.is_empty() { - vec![env::current_dir()?] - } else { - paths.into_iter().collect() - }; - - if dirty { - let dirty_ids = take_dirty(&conn)?; - for id in dirty_ids { - let path: String = conn.query_row( - "SELECT path FROM files WHERE id = ?1", - [id], - |r| r.get(0), - )?; - scan::scan_directory(&mut conn, Path::new(&path))?; - } - } else { - for p in scan_paths { - scan::scan_directory(&mut conn, &p)?; - } - } - } - - /* ---- tag / attribute / search --------------------------- */ - Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?, - - Commands::Attr { action } => match action { - cli::AttrCmd::Set { - pattern, - key, - value, - } => attr_set(&conn, &pattern, &key, &value)?, - cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?, - }, - - Commands::Search { query, exec } => run_search(&conn, &query, exec)?, - - /* ---- maintenance ---------------------------------------- */ - Commands::Backup(opts) => { - cli::backup::run(&opts, &cfg.db_path, &mut conn, args.format)?; - } - - Commands::Restore { backup_path } => { - drop(conn); // close connection so the restore can overwrite the DB file - - if backup_path.exists() { - // User pointed to an actual backup file on disk - db::restore(&backup_path, &cfg.db_path).with_context(|| { - format!("Failed to restore DB from {}", backup_path.display()) - })?; - } else { - // Assume they passed just the file-name that lives in the standard backups dir - let backups_dir = cfg.db_path.parent().unwrap().join("backups"); - let manager = BackupManager::new(&cfg.db_path, &backups_dir)?; - - let name = backup_path - .file_name() - .and_then(|n| n.to_str()) - .context("invalid backup file name")?; - - manager.restore_from_backup(name).with_context(|| { - format!("Failed to restore DB from {}", backup_path.display()) - })?; + /* ---- init ------------------------------------------------ */ + Commands::Init => { + info!("Database initialised at {}", cfg.db_path.display()); + let cwd = env::current_dir().context("getting current directory")?; + let count = scan::scan_directory(&mut conn, &cwd).context("initial scan failed")?; + info!("Initial scan complete – indexed/updated {count} files"); } - println!("Restored DB from {}", backup_path.display()); + /* ---- scan ------------------------------------------------ */ + Commands::Scan { dirty, paths } => { + let scan_paths: Vec = if paths.is_empty() { + vec![env::current_dir()?] + } else { + paths.into_iter().collect() + }; - // Re-open so the rest of the program talks to the fresh database - db::open(&cfg.db_path).with_context(|| { - format!("Could not open restored DB at {}", cfg.db_path.display()) - })?; - info!("Successfully opened restored database."); + if dirty { + let dirty_ids = take_dirty(&conn)?; + for id in dirty_ids { + let path: String = + conn.query_row("SELECT path FROM files WHERE id = ?1", [id], |r| r.get(0))?; + scan::scan_directory(&mut conn, Path::new(&path))?; + } + } else { + for p in scan_paths { + scan::scan_directory(&mut conn, &p)?; + } + } + } + + /* ---- tag / attribute / search --------------------------- */ + Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?, + + Commands::Attr { action } => match action { + cli::AttrCmd::Set { + pattern, + key, + value, + } => attr_set(&conn, &pattern, &key, &value)?, + cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?, + }, + + Commands::Search { query, exec } => run_search(&conn, &query, exec)?, + + /* ---- maintenance ---------------------------------------- */ + Commands::Backup(opts) => { + cli::backup::run(&opts, &cfg.db_path, &mut conn, args.format)?; + } + + Commands::Restore { backup_path } => { + drop(conn); // close connection so the restore can overwrite the DB file + + if backup_path.exists() { + // User pointed to an actual backup file on disk + db::restore(&backup_path, &cfg.db_path).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } else { + // Assume they passed just the file-name that lives in the standard backups dir + let backups_dir = cfg.db_path.parent().unwrap().join("backups"); + let manager = BackupManager::new(&cfg.db_path, &backups_dir)?; + + let name = backup_path + .file_name() + .and_then(|n| n.to_str()) + .context("invalid backup file name")?; + + manager.restore_from_backup(name).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + } + + println!("Restored DB from {}", backup_path.display()); + + // Re-open so the rest of the program talks to the fresh database + db::open(&cfg.db_path).with_context(|| { + format!("Could not open restored DB at {}", cfg.db_path.display()) + })?; + info!("Successfully opened restored database."); + } + + /* ---- passthrough sub-modules ---------------------------- */ + Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?, + Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?, + Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?, + Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?, + Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?, + Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?, + Commands::Annotate(a_cmd) => cli::annotate::run(&a_cmd, &mut conn, args.format)?, + Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?, + Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?, + Commands::Watch(watch_cmd) => cli::watch::run(&watch_cmd, &mut conn, args.format)?, } - /* ---- passthrough sub-modules ---------------------------- */ - Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?, - Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?, - Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?, - Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?, - Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?, - Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?, - Commands::Annotate(a_cmd) => cli::annotate::run(&a_cmd, &mut conn, args.format)?, - Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?, - Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?, - Commands::Watch(watch_cmd) => cli::watch::run(&watch_cmd, &mut conn, args.format)?, + Ok(()) } -Ok(()) - /* ─────────────────── helpers & sub-routines ─────────────────── */ /* ---------- TAGS ---------- */ From 9aee756d029d7829b447223124ab3e1028cfaf6d Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 20:53:17 -0400 Subject: [PATCH 07/18] Document rename/move spec --- docs/roadmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/roadmap.md b/docs/roadmap.md index 5b073f9..c7d6e2b 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -46,7 +46,7 @@ | Tarpaulin coverage gate | S0 | — | – | | Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 | | Backup auto‑prune | Epic 1 | `backup --prune N` | – | -| Rename/move tracking | Epic 2 | automatic path update | – | +| Rename/move tracking | Epic 2 | automatic path update | Spec‑RMH | | Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 | | Grep snippets | Phase 3 | `search -C3 …` | DP‑004 | | Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 | From e20ef2ee77a194e3f4755cede8e8ce968c300e6d Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 21:26:03 -0400 Subject: [PATCH 08/18] Remove duplicate roadmap --- README.md | 82 +++---------------------------------------------- docs/roadmap.md | 6 ++-- 2 files changed, 7 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 91645ba..5633c1c 100644 --- a/README.md +++ b/README.md @@ -1,82 +1,7 @@ -# Marlin ― Delivery Road‑map **v3.2** +# Marlin -*Engineering‑ready – revised 2025‑05‑18* - -> **Legend** △ engineering artefact ✦ user‑visible deliverable - ---- - -## 0 · Methodology primer  (what “Done” means) - -| Theme | Project rule‑of‑thumb | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| **Branching** | Trunk‑based. Feature branch → PR → 2 reviews → squash‑merge. | -| **Spec first** | Each epic begins with a **Design Proposal (DP‑xxx)** in `/docs/adr/` containing schema diffs, example CLI session, perf targets. | -| **Coverage** | Tarpaulin gate ≥ 85 % **on lines touched this sprint** (checked in CI). | -| **Perf gate** | Cold‑start P95 ≤ 3 s on 100 k files **unless overridden in DP**. Regressions fail CI. | -| **Docs** | CLI flags & examples land in `README.md` **same PR**. Docs tables (CLI cheatsheet, TUI key‑map) are auto‑generated during the build. | -| **Demo** | Closing each epic yields a ≤ 2‑min asciinema or GIF in `docs/demos/`. | - ---- - -## 1 · Bird’s‑eye table (engineering details + deliverables) - -| Phase / Sprint | Timeline | Focus & Rationale | ✦ Key UX Deliverables | △ Engineering artefacts / tasks | Definition of Done | -| ----------------------------------------------- | ----------------------------- | -------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | -| **Sprint 0 — Bootstrap & CI Baseline** | **2025‑Q2
(now → 30 May)** | CI scaffolding, coverage, crate split | — | • Split repo into **`libmarlin` (core)** + **`cli-bin`** + **`tui-bin`**
• Tarpaulin coverage + Hyperfine perf jobs wired
• `build.rs` renders CLI cheatsheet from `commands.yaml`
• Docs / cheatsheet autogen step in GitHub Actions | `cargo test --all` passes with coverage gate ≥ 85 %; docs artefacts appear in build; crates compile. | -| **Sprint α — Bedrock & Metadata Domains** | **31 May → 13 Jun 2025** | Lock schema v1.1, first metadata objects | • CLI stubs: `marlin link / coll / view`
• `marlin demo` interactive tour | • **DP‑001 Schema v1.1** (ER + migration scripts)
• Unit tests (`escape_fts`, `determine_scan_root`)
• GitHub Action for SQL dry‑run | 100 % migrations green; demo prints ✅; logo badge shows schema version. | -| **Epic 1 — Live‑Watch Mode & Backup Prune** | **2025‑Q2** | Continuous indexing via FS events; backups never explode | • `marlin watch ` (inotify / FSEvents)
• `backup --prune N` (auto‑prune pre‑ and post‑command) | • **DP‑002** file‑watch life‑cycle & debounce strategy
• Change‑table schema storing dirty file IDs
• Nightly prune CI job | 8 h stress‑watch alters 10 k files → < 1 % missed; backup dir size ≤ N; watch CPU idle < 3 %. | -| **Epic 2 — Dirty‑scan optimisation** | **2025‑Q2** | Re‑index only paths marked dirty by watch table | • `scan --dirty` | • Reuse change‑table from watch; Hyperfine benchmark script committed | Dirty‑scan runtime ≤ 15 % full scan on 100 k corpus; bench job passes. | -| **Phase 3 — Content FTS + Annotations** | 2025‑Q3 | Grep snippets, inline notes | • `search -C3` grep‑style context
• `annotate add/list` | • **DP‑004** content‑blob strategy (inline vs ext‑table)
• `syntect` highlight PoC | Indexes 1 GB corpus ≤ 30 min; snippet CLI golden tests pass. | -| **Phase 4 — Versioning & De‑duplication** | 2025‑Q3 | Historic diffs, SHA‑256 dedupe | • `scan --rehash`
• `version diff ` | • **DP‑005** hash column + Bloom‑de‑dupe research | Diff on 10 MB file ≤ 500 ms; duplicate sets emitted by CLI. | -| **Phase 5 — Tag Aliases & Semantic Booster** | 2025‑Q3 | Tame tag sprawl; start AI hints | • `tag alias add/ls/rm`
• `tag suggest`, `summary` | • **DP‑006** embeddings size & k‑NN search bench | 95 % alias look‑ups resolved in one hop; suggest query ≤ 150 ms. | -| **Phase 6 — Search DSL v2 & Smart Views** | 2025‑Q4 | AND/OR, ranges, structured grammar; smart folders | • New `nom` grammar
• Legacy parser behind **`--legacy-search`** (warn on use) | • **DP‑007** BNF + 30 acceptance strings
• Lexer fuzz tests (`cargo‑fuzz`) | Old queries keep working; 0 panics in fuzz run ≥ 1 M cases. | -| **Phase 7 — Structured Workflows & Templates** | 2025‑Q4 | State graph, relationship templates | • `state set/log`
• `template apply` | • **DP‑008** workflow tables & YAML template spec
• Sample template e2e tests | Create template, apply to 20 files → all attrs/link rows present; illegal transitions blocked. | -| **Phase 8 — TUI v1 + Lightweight Integrations** | 2026‑Q1 | Keyboard UI, VS Code sidebar | • **`marlin‑tui`** binary (tiling panes, key‑map)
• Read‑only VS Code sidebar | • **DP‑009** TUI redraw budget & key‑map
• Crate split fully consumed | TUI binary ≤ 2 MB; scroll redraw ≤ 4 ms; VS Code extension loads index. | -| **Phase 9 — Dolphin Sidebar (MVP)** | 2026‑Q1 | Peek metadata inline in KDE Dolphin | • Qt/KIO sidebar | • **DP‑010** DB/IP bridge (D‑Bus vs UNIX socket)
• CMake packaging script | Sidebar opens ≤ 150 ms; passes KDE lint. | -| **Phase 10 — Full GUI & Multi‑device Sync** | 2026‑Q2 | Visual editor + optional sync backend | • Electron/Qt hybrid explorer UI
• Select & integrate sync (LiteFS / Postgres) | • **DP‑011** sync back‑end trade‑study
• Busy‑timeout/retry strategy for multi‑writer mode | CRUD round‑trip < 2 s between two nodes; 25 GUI e2e tests green. | - ---- - -### 2 · Feature cross‑matrix (quick look‑ups) - -| Capability | Sprint / Phase | CLI / GUI element | Linked DP | -| -------------------------- | -------------- | ----------------------------------- | --------- | -| Crate split & docs autogen | S0 | — | – | -| Tarpaulin coverage gate | S0 | — | – | -| Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 | -| Backup auto‑prune | Epic 1 | `backup --prune N` | – | -| Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 | -| Grep snippets | Phase 3 | `search -C3 …` | DP‑004 | -| Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 | -| Tag aliases | Phase 5 | `tag alias` commands | DP‑006 | -| Search DSL v2 | Phase 6 | new grammar, `--legacy-search` flag | DP‑007 | -| Relationship templates | Phase 7 | `template new/apply` | DP‑008 | -| TUI v1 | Phase 8 | `marlin‑tui` | DP‑009 | - ---- - -## 3 · Milestone acceptance checklist - -Before a milestone is declared **shipped**: - -* [ ] **Spec** DP‑xxx merged with schema diff, ASCII‑cast demo -* [ ] **Tests** Tarpaulin ≥ 85 % on changed lines; all suites green -* [ ] **Perf guard** script passes on CI matrix (Ubuntu 22, macOS 14) -* [ ] **Docs** auto‑regenerated; README & cheatsheet updated -* [ ] **Demo** asciinema/GIF committed and linked in release notes -* [ ] **Release tag** pushed; Cargo binary uploaded to GitHub Releases - ---- - -## 4 · Next immediate actions - -| # | Task | Owner | Due | -| - | ------------------------------ | ------ | ------------- | -| 1 | Crate split + CI baseline | @alice | **26 May 25** | -| 2 | Tarpaulin + Hyperfine jobs | @bob | **26 May 25** | -| 3 | **DP‑001 Schema v1.1** draft | @carol | **30 May 25** | -| 4 | backup prune CLI + nightly job | @dave | **05 Jun 25** | +This repository hosts the Marlin indexing tool. +See [docs/roadmap.md](docs/roadmap.md) for the current delivery roadmap. ## CLI Cheatsheet @@ -86,3 +11,4 @@ The full command reference is generated during the build of the CLI. See ## License Licensed under the [MIT License](LICENSE). + diff --git a/docs/roadmap.md b/docs/roadmap.md index c7d6e2b..9556e73 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -73,9 +73,9 @@ Before a milestone is declared “shipped”: | # | Task | Owner | Due | | - | ------------------------------ | ------ | ------------- | -| 1 | Crate split + CI baseline | @alice | **26 May 25** | -| 2 | Tarpaulin + Hyperfine jobs | @bob | **26 May 25** | +| ~~1~~ | ~~Crate split + CI baseline~~ | @alice | ~~26 May 25~~ | +| ~~2~~ | ~~Tarpaulin + Hyperfine jobs~~ | @bob | ~~26 May 25~~ | | 3 | **DP‑001 Schema v1.1** draft | @carol | **30 May 25** | -| 4 | backup prune CLI + nightly job | @dave | **05 Jun 25** | +| ~~4~~ | ~~backup prune CLI + nightly job~~ | @dave | ~~05 Jun 25~~ | > *This roadmap now contains both product-level “what” and engineering-level “how/when/prove it”. It should allow a new contributor to jump in, pick the matching DP, and know exactly the bar they must clear for their code to merge.* From 8c64d55a125aa5fc05e980d7956b23a6bb446dc3 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 22:20:29 -0400 Subject: [PATCH 09/18] Update schema DP to match latest migrations --- docs/adr/DP-001_schema_v1.1.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/docs/adr/DP-001_schema_v1.1.md b/docs/adr/DP-001_schema_v1.1.md index 37a46cf..d5480b1 100644 --- a/docs/adr/DP-001_schema_v1.1.md +++ b/docs/adr/DP-001_schema_v1.1.md @@ -1,6 +1,6 @@ # DP-001: Schema v1.1 – Core Metadata Domains -**Status**: Proposed +**Status**: Accepted **Authors**: @carol **Date**: 2025-05-17 @@ -8,14 +8,14 @@ We’ve landed a basic SQLite-backed `files` table and a contentless FTS5 index. Before we build out higher-level features, we need to lock down our **v1.1** metadata schema for: -- **Hierarchical tags** (`tags` + `file_tags`) +- **Hierarchical tags** (`tags` + `file_tags`) – optional `canonical_id` for aliases - **Custom attributes** (`attributes`) - **File-to-file relationships** (`links`) - **Named collections** (`collections` + `collection_files`) -- **Saved views** (`saved_views`) +- **Views** (`views`) Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about. -*Note: Tag aliases and their `canonical_id` support are deferred to DP-006 (v1.5).* +Tags optionally reference a canonical tag via the `canonical_id` column. ## 2. Decision @@ -33,7 +33,7 @@ All foreign keys use `ON DELETE CASCADE` so deleting a file, tag, etc. automatic 1. **0001\_initial\_schema.sql** – create core tables (`files`, `tags`, `file_tags`, `attributes`), a contentless FTS5 table (`files_fts`), core FTS triggers, and performance-critical indexes. 2. **0002\_update\_fts\_and\_triggers.sql** – replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh. - 3. **0003\_create\_links\_collections\_saved\_views.sql** – introduce `links`, `collections`, `collection_files`, and `saved_views` tables. + 3. **0003\_create\_links\_collections\_views.sql** – introduce `links`, `collections`, `collection_files`, and `views` tables. 4. **0004\_fix\_hierarchical\_tags\_fts.sql** – refine FTS triggers to index full hierarchical tag-paths via a recursive CTE. 3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store. @@ -57,6 +57,7 @@ entity tags { -- name : TEXT parent_id : INTEGER <> + canonical_id : INTEGER <> } entity file_tags { @@ -91,7 +92,7 @@ entity collection_files { * file_id : INTEGER <> } -entity saved_views { +entity views { * id : INTEGER <> -- name : TEXT @@ -109,7 +110,7 @@ files ||--o{ links : "dst_file_id" collections ||--o{ collection_files files ||--o{ collection_files -saved_views ||..|| files : "exec via FTS" +views ||..|| files : "exec via FTS" @enduml ``` @@ -135,7 +136,7 @@ Or in plain-ASCII: └─────────────┘ └──────────────────┘ └────────┘ ┌─────────────┐ -│ saved_views │ +│ views │ │ (exec FTS) │ └─────────────┘ ``` @@ -146,8 +147,9 @@ Or in plain-ASCII: | ------------------------------------------------------ | ------------------------------------------------------------- | | **0001\_initial\_schema.sql** | Core tables + contentless FTS + core triggers + indexes | | **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes | -| **0003\_create\_links\_collections\_saved\_views.sql** | Add `links`, `collections`, `collection_files`, `saved_views` | +| **0003\_create\_links\_collections\_views.sql** | Add `links`, `collections`, `collection_files`, `views` | | **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full tag-path indexing in FTS triggers | +| **0005_add_dirty_table.sql** | Track modified files needing reindexing | ### Performance-Critical Indexes From 794180ac18ef5c7f1949861dbc6707f9223e5ba2 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Wed, 21 May 2025 22:45:55 -0400 Subject: [PATCH 10/18] Drop canonical_id from tags --- libmarlin/src/db/migrations/0001_initial_schema.sql | 1 - libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql | 6 ++++++ libmarlin/src/db/mod.rs | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql diff --git a/libmarlin/src/db/migrations/0001_initial_schema.sql b/libmarlin/src/db/migrations/0001_initial_schema.sql index b3b3dd8..76d1963 100644 --- a/libmarlin/src/db/migrations/0001_initial_schema.sql +++ b/libmarlin/src/db/migrations/0001_initial_schema.sql @@ -17,7 +17,6 @@ CREATE TABLE IF NOT EXISTS tags ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, -- tag segment parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, - canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL, UNIQUE(name, parent_id) ); diff --git a/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql b/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql new file mode 100644 index 0000000..6180843 --- /dev/null +++ b/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql @@ -0,0 +1,6 @@ +PRAGMA foreign_keys = ON; +PRAGMA journal_mode = WAL; + +-- Remove canonical_id column from tags table +ALTER TABLE tags DROP COLUMN canonical_id; + diff --git a/libmarlin/src/db/mod.rs b/libmarlin/src/db/mod.rs index fc7974d..f173d56 100644 --- a/libmarlin/src/db/mod.rs +++ b/libmarlin/src/db/mod.rs @@ -41,6 +41,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "0005_add_dirty_table.sql", include_str!("migrations/0005_add_dirty_table.sql"), ), + ( + "0006_drop_tags_canonical_id.sql", + include_str!("migrations/0006_drop_tags_canonical_id.sql"), + ), ]; /* ─── connection bootstrap ────────────────────────────────────────── */ From af35c90c5089b5529d65c66b33b44f7b5d64ac0b Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 08:17:51 -0400 Subject: [PATCH 11/18] Add schema version constant and checks --- docs/adr/DP-001_schema_v1.1.md | 1 + libmarlin/src/db/mod.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/docs/adr/DP-001_schema_v1.1.md b/docs/adr/DP-001_schema_v1.1.md index d5480b1..e8bf484 100644 --- a/docs/adr/DP-001_schema_v1.1.md +++ b/docs/adr/DP-001_schema_v1.1.md @@ -36,6 +36,7 @@ All foreign keys use `ON DELETE CASCADE` so deleting a file, tag, etc. automatic 3. **0003\_create\_links\_collections\_views.sql** – introduce `links`, `collections`, `collection_files`, and `views` tables. 4. **0004\_fix\_hierarchical\_tags\_fts.sql** – refine FTS triggers to index full hierarchical tag-paths via a recursive CTE. 3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store. +4. Track the version in code via `SCHEMA_VERSION` and provide `current_schema_version()` to query the DB. ## 3. ER Diagram diff --git a/libmarlin/src/db/mod.rs b/libmarlin/src/db/mod.rs index f173d56..01adca0 100644 --- a/libmarlin/src/db/mod.rs +++ b/libmarlin/src/db/mod.rs @@ -18,6 +18,11 @@ use rusqlite::{ use std::result::Result as StdResult; use tracing::{debug, info, warn}; +/* ─── schema version ───────────────────────────────────────────────── */ + +/// Current library schema version. +pub const SCHEMA_VERSION: i32 = 1_1; + /* ─── embedded migrations ─────────────────────────────────────────── */ const MIGRATIONS: &[(&str, &str)] = &[ @@ -47,6 +52,18 @@ const MIGRATIONS: &[(&str, &str)] = &[ ), ]; +/* ─── schema helpers ─────────────────────────────────────────────── */ + +/// Fetch the highest version recorded in the `schema_version` table. +pub fn current_schema_version(conn: &Connection) -> Result { + let version: i32 = conn.query_row( + "SELECT IFNULL(MAX(version), 0) FROM schema_version", + [], + |r| r.get(0), + )?; + Ok(version) +} + /* ─── connection bootstrap ────────────────────────────────────────── */ pub fn open>(db_path: P) -> Result { @@ -133,6 +150,15 @@ pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> { warn!("migrations not applied: {:?}", missing); } + let current = current_schema_version(conn)?; + if current != SCHEMA_VERSION { + anyhow::bail!( + "database schema version {} does not match library version {}", + current, + SCHEMA_VERSION + ); + } + Ok(()) } From f137541c99623a5ca3414081d1b56c9f72983320 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 08:54:16 -0400 Subject: [PATCH 12/18] Update README with collections and views --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5633c1c..5c10660 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,26 @@ # Marlin This repository hosts the Marlin indexing tool. -See [docs/roadmap.md](docs/roadmap.md) for the current delivery roadmap. +See [docs/roadmap.md](docs/roadmap.md) and +[docs/adr/DP-001_schema_v1.1.md](docs/adr/DP-001_schema_v1.1.md) +for the current delivery roadmap and schema. ## CLI Cheatsheet The full command reference is generated during the build of the CLI. See [cli-bin/docs/cli_cheatsheet.md](cli-bin/docs/cli_cheatsheet.md). +## Collections and Views + +Named **collections** act like playlists of files. Create one with +`marlin coll create `, add files via +`marlin coll add ` and list contents using +`marlin coll list `. + +**Views** save search queries for quick reuse. Save a query with +`marlin view save "tag:todo"`, list all views using +`marlin view list` and execute one with `marlin view exec `. + ## License Licensed under the [MIT License](LICENSE). - From a3eda234c0f065aa4f59c941395885edd7dfa86b Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 09:32:23 -0400 Subject: [PATCH 13/18] Add FTS trigger test --- libmarlin/src/db_tests.rs | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/libmarlin/src/db_tests.rs b/libmarlin/src/db_tests.rs index 29c4e69..a391fdc 100644 --- a/libmarlin/src/db_tests.rs +++ b/libmarlin/src/db_tests.rs @@ -234,3 +234,55 @@ mod dirty_helpers { assert!(empty.is_empty()); } } + +#[test] +fn tables_exist_and_fts_triggers() { + use super::Marlin; + use std::fs; + + let tmp = tempdir().unwrap(); + let db_path = tmp.path().join("test.db"); + let mut marlin = Marlin::open_at(&db_path).unwrap(); + + // the DB file should exist after opening + assert!(db_path.exists()); + + // confirm required tables + for table in ["links", "collections", "collection_files", "views"] { + let cnt: i64 = marlin + .conn() + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1", + [table], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(cnt, 1, "missing table {table}"); + } + + // create a file to index + let file_dir = tmp.path().join("files"); + fs::create_dir(&file_dir).unwrap(); + let file_path = file_dir.join("sample.txt"); + fs::write(&file_path, "hello world").unwrap(); + + // index via public helper + marlin.scan(&[&file_dir]).unwrap(); + marlin.tag("*.txt", "foo/bar").unwrap(); + + let fid = db::file_id(marlin.conn(), file_path.to_str().unwrap()).unwrap(); + db::upsert_attr(marlin.conn(), fid, "color", "blue").unwrap(); + + let row: (String, String, String) = marlin + .conn() + .query_row( + "SELECT path, tags_text, attrs_text FROM files_fts WHERE rowid = ?1", + [fid], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + + assert_eq!(row.0, file_path.to_str().unwrap()); + assert!(row.1.contains("foo") && row.1.contains("bar")); + assert_eq!(row.2, "color=blue"); +} From 37764a643c9f3589ba2f5ad3799e97c647041560 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 10:28:03 -0400 Subject: [PATCH 14/18] Align schema version with migration count --- libmarlin/src/db/migrations/0001_initial_schema.sql | 1 + libmarlin/src/db/mod.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libmarlin/src/db/migrations/0001_initial_schema.sql b/libmarlin/src/db/migrations/0001_initial_schema.sql index 76d1963..0780bf3 100644 --- a/libmarlin/src/db/migrations/0001_initial_schema.sql +++ b/libmarlin/src/db/migrations/0001_initial_schema.sql @@ -17,6 +17,7 @@ CREATE TABLE IF NOT EXISTS tags ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, -- tag segment parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, + canonical_id INTEGER REFERENCES tags(id), UNIQUE(name, parent_id) ); diff --git a/libmarlin/src/db/mod.rs b/libmarlin/src/db/mod.rs index 01adca0..4ee4241 100644 --- a/libmarlin/src/db/mod.rs +++ b/libmarlin/src/db/mod.rs @@ -21,7 +21,7 @@ use tracing::{debug, info, warn}; /* ─── schema version ───────────────────────────────────────────────── */ /// Current library schema version. -pub const SCHEMA_VERSION: i32 = 1_1; +pub const SCHEMA_VERSION: i32 = MIGRATIONS.len() as i32; /* ─── embedded migrations ─────────────────────────────────────────── */ From 5009367db422fd470cb36486b6b3deb1163e6e12 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 11:26:19 -0400 Subject: [PATCH 15/18] Fix FTS smoke test for contentless table --- libmarlin/src/db_tests.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/libmarlin/src/db_tests.rs b/libmarlin/src/db_tests.rs index a391fdc..76527e1 100644 --- a/libmarlin/src/db_tests.rs +++ b/libmarlin/src/db_tests.rs @@ -273,16 +273,25 @@ fn tables_exist_and_fts_triggers() { let fid = db::file_id(marlin.conn(), file_path.to_str().unwrap()).unwrap(); db::upsert_attr(marlin.conn(), fid, "color", "blue").unwrap(); - let row: (String, String, String) = marlin + // The FTS index is contentless, so columns return empty strings. Instead + // verify that searching for our tag and attribute yields the file path. + let hits_tag: Vec = marlin .conn() - .query_row( - "SELECT path, tags_text, attrs_text FROM files_fts WHERE rowid = ?1", - [fid], - |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), - ) + .prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'foo'") + .unwrap() + .query_map([], |r| r.get(0)) + .unwrap() + .collect::, _>>() .unwrap(); + assert!(hits_tag.contains(&file_path.to_string_lossy().into_owned())); - assert_eq!(row.0, file_path.to_str().unwrap()); - assert!(row.1.contains("foo") && row.1.contains("bar")); - assert_eq!(row.2, "color=blue"); + let hits_attr: Vec = marlin + .conn() + .prepare(r#"SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH '"color=blue"'"#) + .unwrap() + .query_map([], |r| r.get(0)) + .unwrap() + .collect::, _>>() + .unwrap(); + assert!(hits_attr.contains(&file_path.to_string_lossy().into_owned())); } From bc8e4fbdabd120719ae458537c12e6bb607f17c4 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 12:37:29 -0400 Subject: [PATCH 16/18] Serialize config tests with env mutex --- Cargo.lock | 1 + libmarlin/Cargo.toml | 1 + libmarlin/src/config_tests.rs | 4 ++++ libmarlin/src/facade_tests.rs | 4 ++++ libmarlin/src/lib.rs | 2 ++ libmarlin/src/test_utils.rs | 8 ++++++++ 6 files changed, 20 insertions(+) create mode 100644 libmarlin/src/test_utils.rs diff --git a/Cargo.lock b/Cargo.lock index 4052d2c..fcb1a12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -633,6 +633,7 @@ dependencies = [ "crossbeam-channel", "directories", "glob", + "lazy_static", "notify", "priority-queue", "rusqlite", diff --git a/libmarlin/Cargo.toml b/libmarlin/Cargo.toml index 14dbae6..af61a9b 100644 --- a/libmarlin/Cargo.toml +++ b/libmarlin/Cargo.toml @@ -27,6 +27,7 @@ json = ["serde_json"] [dev-dependencies] # for temporary directories in config_tests.rs and scan_tests.rs tempfile = "3" +lazy_static = "1" # you already have rusqlite in [dependencies], so scan_tests.rs # can just use rusqlite::Connection, no need to repeat it here. diff --git a/libmarlin/src/config_tests.rs b/libmarlin/src/config_tests.rs index 2c79883..b7d352c 100644 --- a/libmarlin/src/config_tests.rs +++ b/libmarlin/src/config_tests.rs @@ -1,11 +1,13 @@ // libmarlin/src/config_tests.rs use super::config::Config; +use crate::test_utils::ENV_MUTEX; use std::env; use tempfile::tempdir; #[test] fn load_env_override() { + let _guard = ENV_MUTEX.lock().unwrap(); let tmp = tempdir().unwrap(); let db = tmp.path().join("custom.db"); env::set_var("MARLIN_DB_PATH", &db); @@ -16,6 +18,7 @@ fn load_env_override() { #[test] fn load_xdg_or_fallback() { + let _guard = ENV_MUTEX.lock().unwrap(); // since XDG_DATA_HOME will normally be present, just test it doesn't error let cfg = Config::load().unwrap(); assert!(cfg.db_path.to_string_lossy().ends_with(".db")); @@ -23,6 +26,7 @@ fn load_xdg_or_fallback() { #[test] fn load_fallback_current_dir() { + let _guard = ENV_MUTEX.lock().unwrap(); // Save and clear HOME & XDG_DATA_HOME let orig_home = env::var_os("HOME"); let orig_xdg = env::var_os("XDG_DATA_HOME"); diff --git a/libmarlin/src/facade_tests.rs b/libmarlin/src/facade_tests.rs index 2e3f259..0f3b0e1 100644 --- a/libmarlin/src/facade_tests.rs +++ b/libmarlin/src/facade_tests.rs @@ -1,11 +1,13 @@ // libmarlin/src/facade_tests.rs use super::*; // brings Marlin, config, etc. +use crate::test_utils::ENV_MUTEX; use std::{env, fs}; use tempfile::tempdir; #[test] fn open_at_and_scan_and_search() { + let _guard = ENV_MUTEX.lock().unwrap(); // 1) Prepare a temp workspace with one file let tmp = tempdir().unwrap(); let file = tmp.path().join("hello.txt"); @@ -33,6 +35,7 @@ fn open_at_and_scan_and_search() { #[test] fn tag_and_search_by_tag() { + let _guard = ENV_MUTEX.lock().unwrap(); let tmp = tempdir().unwrap(); let a = tmp.path().join("a.md"); let b = tmp.path().join("b.md"); @@ -56,6 +59,7 @@ fn tag_and_search_by_tag() { #[test] fn open_default_fallback_config() { + let _guard = ENV_MUTEX.lock().unwrap(); // Unset all overrides env::remove_var("MARLIN_DB_PATH"); env::remove_var("XDG_DATA_HOME"); diff --git a/libmarlin/src/lib.rs b/libmarlin/src/lib.rs index ff19a88..d699a6d 100644 --- a/libmarlin/src/lib.rs +++ b/libmarlin/src/lib.rs @@ -27,6 +27,8 @@ mod logging_tests; #[cfg(test)] mod scan_tests; #[cfg(test)] +mod test_utils; +#[cfg(test)] mod utils_tests; #[cfg(test)] mod watcher_tests; diff --git a/libmarlin/src/test_utils.rs b/libmarlin/src/test_utils.rs new file mode 100644 index 0000000..ac96844 --- /dev/null +++ b/libmarlin/src/test_utils.rs @@ -0,0 +1,8 @@ +use std::sync::Mutex; + +use lazy_static::lazy_static; + +lazy_static! { + /// Global mutex to serialize environment-variable modifications in tests. + pub static ref ENV_MUTEX: Mutex<()> = Mutex::new(()); +} From a015c1f5090a626816b53c5d9156e54b51e602cb Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 14:22:48 -0400 Subject: [PATCH 17/18] Document new commands and quick start --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 5c10660..6c54ef8 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,12 @@ See [docs/roadmap.md](docs/roadmap.md) and [docs/adr/DP-001_schema_v1.1.md](docs/adr/DP-001_schema_v1.1.md) for the current delivery roadmap and schema. +## Quick Start + +Follow the short walkthrough in +[docs/marlin_demo.md](docs/marlin_demo.md) to build the +binary and test Marlin on a sample project. + ## CLI Cheatsheet The full command reference is generated during the build of the CLI. See @@ -21,6 +27,13 @@ Named **collections** act like playlists of files. Create one with `marlin view save "tag:todo"`, list all views using `marlin view list` and execute one with `marlin view exec `. +Other handy commands include: + +- `marlin watch ` to keep the index updated in real time. +- `marlin backup run` to create or prune database backups. +- `marlin link add` to relate files with typed edges. +- `marlin annotate add` to attach notes or highlights. + ## License Licensed under the [MIT License](LICENSE). From c64cc2f944ef9cca2dc32c7eec0bdeb1e6bdf67e Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Thu, 22 May 2025 14:23:36 -0400 Subject: [PATCH 18/18] Add placeholder ADRs for pending design proposals --- docs/adr/DP-002_dirty-flag.md | 7 +++++++ docs/adr/DP-004_content-blob_strategy.md | 7 +++++++ docs/adr/DP-005_hash_and_dedupe.md | 7 +++++++ docs/adr/DP-006_embeddings_and_models.md | 7 +++++++ docs/adr/DP-007_search_dsl_v2.md | 7 +++++++ docs/adr/DP-008_workflow_tables.md | 7 +++++++ docs/adr/DP-009_tui_keymap.md | 7 +++++++ docs/adr/DP-010_kde_sidebar.md | 7 +++++++ docs/adr/DP-011_sync_backend.md | 7 +++++++ 9 files changed, 63 insertions(+) create mode 100644 docs/adr/DP-002_dirty-flag.md create mode 100644 docs/adr/DP-004_content-blob_strategy.md create mode 100644 docs/adr/DP-005_hash_and_dedupe.md create mode 100644 docs/adr/DP-006_embeddings_and_models.md create mode 100644 docs/adr/DP-007_search_dsl_v2.md create mode 100644 docs/adr/DP-008_workflow_tables.md create mode 100644 docs/adr/DP-009_tui_keymap.md create mode 100644 docs/adr/DP-010_kde_sidebar.md create mode 100644 docs/adr/DP-011_sync_backend.md diff --git a/docs/adr/DP-002_dirty-flag.md b/docs/adr/DP-002_dirty-flag.md new file mode 100644 index 0000000..8e25b64 --- /dev/null +++ b/docs/adr/DP-002_dirty-flag.md @@ -0,0 +1,7 @@ +# DP-002: Dirty Flag Design & FTS Rebuild Cadence + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +This placeholder reserves the DP number for the dirty-scan design proposal described in the roadmap. Details will be filled in once the full specification is drafted. diff --git a/docs/adr/DP-004_content-blob_strategy.md b/docs/adr/DP-004_content-blob_strategy.md new file mode 100644 index 0000000..ee02f24 --- /dev/null +++ b/docs/adr/DP-004_content-blob_strategy.md @@ -0,0 +1,7 @@ +# DP-004: Content-Blob Strategy (Inline vs External Table) + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Reserved for the proposal covering how file contents are stored and indexed for full-text search and annotations. diff --git a/docs/adr/DP-005_hash_and_dedupe.md b/docs/adr/DP-005_hash_and_dedupe.md new file mode 100644 index 0000000..71553ea --- /dev/null +++ b/docs/adr/DP-005_hash_and_dedupe.md @@ -0,0 +1,7 @@ +# DP-005: Hash Column & Bloom-Based Deduplication + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Reserved for the proposal defining SHA-256 hashing and duplicate detection via Bloom filters. diff --git a/docs/adr/DP-006_embeddings_and_models.md b/docs/adr/DP-006_embeddings_and_models.md new file mode 100644 index 0000000..8c2d41b --- /dev/null +++ b/docs/adr/DP-006_embeddings_and_models.md @@ -0,0 +1,7 @@ +# DP-006: Embeddings Size & Model Choice + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Placeholder for design decisions around semantic embeddings, vector store schema, and model selection. diff --git a/docs/adr/DP-007_search_dsl_v2.md b/docs/adr/DP-007_search_dsl_v2.md new file mode 100644 index 0000000..b4db58c --- /dev/null +++ b/docs/adr/DP-007_search_dsl_v2.md @@ -0,0 +1,7 @@ +# DP-007: Search DSL v2 Grammar + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Reserved for the formal grammar and parser design for the advanced search language. diff --git a/docs/adr/DP-008_workflow_tables.md b/docs/adr/DP-008_workflow_tables.md new file mode 100644 index 0000000..56789df --- /dev/null +++ b/docs/adr/DP-008_workflow_tables.md @@ -0,0 +1,7 @@ +# DP-008: Workflow Tables & Validation + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Placeholder for the schema and validation rules supporting structured workflows and relationship templates. diff --git a/docs/adr/DP-009_tui_keymap.md b/docs/adr/DP-009_tui_keymap.md new file mode 100644 index 0000000..25e53e5 --- /dev/null +++ b/docs/adr/DP-009_tui_keymap.md @@ -0,0 +1,7 @@ +# DP-009: TUI Key Map & Redraw Budget + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Reserved for the design of keyboard interactions and performance targets for the TUI. diff --git a/docs/adr/DP-010_kde_sidebar.md b/docs/adr/DP-010_kde_sidebar.md new file mode 100644 index 0000000..76d95dd --- /dev/null +++ b/docs/adr/DP-010_kde_sidebar.md @@ -0,0 +1,7 @@ +# DP-010: DB/IP Bridge for KDE Sidebar + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Placeholder for communication mechanisms and packaging strategy for the Dolphin sidebar integration. diff --git a/docs/adr/DP-011_sync_backend.md b/docs/adr/DP-011_sync_backend.md new file mode 100644 index 0000000..6ede3f1 --- /dev/null +++ b/docs/adr/DP-011_sync_backend.md @@ -0,0 +1,7 @@ +# DP-011: Sync Backend Trade-Study + +**Status**: Pending +**Authors**: TBA +**Date**: 2025-05-19 + +Reserved for evaluation of synchronization approaches and end-to-end UI test plan.