diff --git a/Cargo.lock b/Cargo.lock
index 4052d2c..fcb1a12 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -633,6 +633,7 @@ dependencies = [
"crossbeam-channel",
"directories",
"glob",
+ "lazy_static",
"notify",
"priority-queue",
"rusqlite",
diff --git a/README.md b/README.md
index 91645ba..6c54ef8 100644
--- a/README.md
+++ b/README.md
@@ -1,88 +1,39 @@
-# Marlin ― Delivery Road‑map **v3.2**
+# Marlin
-*Engineering‑ready – revised 2025‑05‑18*
+This repository hosts the Marlin indexing tool.
+See [docs/roadmap.md](docs/roadmap.md) and
+[docs/adr/DP-001_schema_v1.1.md](docs/adr/DP-001_schema_v1.1.md)
+for the current delivery roadmap and schema.
-> **Legend** △ engineering artefact ✦ user‑visible deliverable
+## Quick Start
----
-
-## 0 · Methodology primer (what “Done” means)
-
-| Theme | Project rule‑of‑thumb |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
-| **Branching** | Trunk‑based. Feature branch → PR → 2 reviews → squash‑merge. |
-| **Spec first** | Each epic begins with a **Design Proposal (DP‑xxx)** in `/docs/adr/` containing schema diffs, example CLI session, perf targets. |
-| **Coverage** | Tarpaulin gate ≥ 85 % **on lines touched this sprint** (checked in CI). |
-| **Perf gate** | Cold‑start P95 ≤ 3 s on 100 k files **unless overridden in DP**. Regressions fail CI. |
-| **Docs** | CLI flags & examples land in `README.md` **same PR**. Docs tables (CLI cheatsheet, TUI key‑map) are auto‑generated during the build. |
-| **Demo** | Closing each epic yields a ≤ 2‑min asciinema or GIF in `docs/demos/`. |
-
----
-
-## 1 · Bird’s‑eye table (engineering details + deliverables)
-
-| Phase / Sprint | Timeline | Focus & Rationale | ✦ Key UX Deliverables | △ Engineering artefacts / tasks | Definition of Done |
-| ----------------------------------------------- | ----------------------------- | -------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
-| **Sprint 0 — Bootstrap & CI Baseline** | **2025‑Q2
(now → 30 May)** | CI scaffolding, coverage, crate split | — | • Split repo into **`libmarlin` (core)** + **`cli-bin`** + **`tui-bin`**
• Tarpaulin coverage + Hyperfine perf jobs wired
• `build.rs` renders CLI cheatsheet from `commands.yaml`
• Docs / cheatsheet autogen step in GitHub Actions | `cargo test --all` passes with coverage gate ≥ 85 %; docs artefacts appear in build; crates compile. |
-| **Sprint α — Bedrock & Metadata Domains** | **31 May → 13 Jun 2025** | Lock schema v1.1, first metadata objects | • CLI stubs: `marlin link / coll / view`
• `marlin demo` interactive tour | • **DP‑001 Schema v1.1** (ER + migration scripts)
• Unit tests (`escape_fts`, `determine_scan_root`)
• GitHub Action for SQL dry‑run | 100 % migrations green; demo prints ✅; logo badge shows schema version. |
-| **Epic 1 — Live‑Watch Mode & Backup Prune** | **2025‑Q2** | Continuous indexing via FS events; backups never explode | • `marlin watch
` (inotify / FSEvents)
• `backup --prune N` (auto‑prune pre‑ and post‑command) | • **DP‑002** file‑watch life‑cycle & debounce strategy
• Change‑table schema storing dirty file IDs
• Nightly prune CI job | 8 h stress‑watch alters 10 k files → < 1 % missed; backup dir size ≤ N; watch CPU idle < 3 %. |
-| **Epic 2 — Dirty‑scan optimisation** | **2025‑Q2** | Re‑index only paths marked dirty by watch table | • `scan --dirty` | • Reuse change‑table from watch; Hyperfine benchmark script committed | Dirty‑scan runtime ≤ 15 % full scan on 100 k corpus; bench job passes. |
-| **Phase 3 — Content FTS + Annotations** | 2025‑Q3 | Grep snippets, inline notes | • `search -C3` grep‑style context
• `annotate add/list` | • **DP‑004** content‑blob strategy (inline vs ext‑table)
• `syntect` highlight PoC | Indexes 1 GB corpus ≤ 30 min; snippet CLI golden tests pass. |
-| **Phase 4 — Versioning & De‑duplication** | 2025‑Q3 | Historic diffs, SHA‑256 dedupe | • `scan --rehash`
• `version diff ` | • **DP‑005** hash column + Bloom‑de‑dupe research | Diff on 10 MB file ≤ 500 ms; duplicate sets emitted by CLI. |
-| **Phase 5 — Tag Aliases & Semantic Booster** | 2025‑Q3 | Tame tag sprawl; start AI hints | • `tag alias add/ls/rm`
• `tag suggest`, `summary` | • **DP‑006** embeddings size & k‑NN search bench | 95 % alias look‑ups resolved in one hop; suggest query ≤ 150 ms. |
-| **Phase 6 — Search DSL v2 & Smart Views** | 2025‑Q4 | AND/OR, ranges, structured grammar; smart folders | • New `nom` grammar
• Legacy parser behind **`--legacy-search`** (warn on use) | • **DP‑007** BNF + 30 acceptance strings
• Lexer fuzz tests (`cargo‑fuzz`) | Old queries keep working; 0 panics in fuzz run ≥ 1 M cases. |
-| **Phase 7 — Structured Workflows & Templates** | 2025‑Q4 | State graph, relationship templates | • `state set/log`
• `template apply` | • **DP‑008** workflow tables & YAML template spec
• Sample template e2e tests | Create template, apply to 20 files → all attrs/link rows present; illegal transitions blocked. |
-| **Phase 8 — TUI v1 + Lightweight Integrations** | 2026‑Q1 | Keyboard UI, VS Code sidebar | • **`marlin‑tui`** binary (tiling panes, key‑map)
• Read‑only VS Code sidebar | • **DP‑009** TUI redraw budget & key‑map
• Crate split fully consumed | TUI binary ≤ 2 MB; scroll redraw ≤ 4 ms; VS Code extension loads index. |
-| **Phase 9 — Dolphin Sidebar (MVP)** | 2026‑Q1 | Peek metadata inline in KDE Dolphin | • Qt/KIO sidebar | • **DP‑010** DB/IP bridge (D‑Bus vs UNIX socket)
• CMake packaging script | Sidebar opens ≤ 150 ms; passes KDE lint. |
-| **Phase 10 — Full GUI & Multi‑device Sync** | 2026‑Q2 | Visual editor + optional sync backend | • Electron/Qt hybrid explorer UI
• Select & integrate sync (LiteFS / Postgres) | • **DP‑011** sync back‑end trade‑study
• Busy‑timeout/retry strategy for multi‑writer mode | CRUD round‑trip < 2 s between two nodes; 25 GUI e2e tests green. |
-
----
-
-### 2 · Feature cross‑matrix (quick look‑ups)
-
-| Capability | Sprint / Phase | CLI / GUI element | Linked DP |
-| -------------------------- | -------------- | ----------------------------------- | --------- |
-| Crate split & docs autogen | S0 | — | – |
-| Tarpaulin coverage gate | S0 | — | – |
-| Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 |
-| Backup auto‑prune | Epic 1 | `backup --prune N` | – |
-| Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 |
-| Grep snippets | Phase 3 | `search -C3 …` | DP‑004 |
-| Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 |
-| Tag aliases | Phase 5 | `tag alias` commands | DP‑006 |
-| Search DSL v2 | Phase 6 | new grammar, `--legacy-search` flag | DP‑007 |
-| Relationship templates | Phase 7 | `template new/apply` | DP‑008 |
-| TUI v1 | Phase 8 | `marlin‑tui` | DP‑009 |
-
----
-
-## 3 · Milestone acceptance checklist
-
-Before a milestone is declared **shipped**:
-
-* [ ] **Spec** DP‑xxx merged with schema diff, ASCII‑cast demo
-* [ ] **Tests** Tarpaulin ≥ 85 % on changed lines; all suites green
-* [ ] **Perf guard** script passes on CI matrix (Ubuntu 22, macOS 14)
-* [ ] **Docs** auto‑regenerated; README & cheatsheet updated
-* [ ] **Demo** asciinema/GIF committed and linked in release notes
-* [ ] **Release tag** pushed; Cargo binary uploaded to GitHub Releases
-
----
-
-## 4 · Next immediate actions
-
-| # | Task | Owner | Due |
-| - | ------------------------------ | ------ | ------------- |
-| 1 | Crate split + CI baseline | @alice | **26 May 25** |
-| 2 | Tarpaulin + Hyperfine jobs | @bob | **26 May 25** |
-| 3 | **DP‑001 Schema v1.1** draft | @carol | **30 May 25** |
-| 4 | backup prune CLI + nightly job | @dave | **05 Jun 25** |
+Follow the short walkthrough in
+[docs/marlin_demo.md](docs/marlin_demo.md) to build the
+binary and test Marlin on a sample project.
## CLI Cheatsheet
The full command reference is generated during the build of the CLI. See
[cli-bin/docs/cli_cheatsheet.md](cli-bin/docs/cli_cheatsheet.md).
+## Collections and Views
+
+Named **collections** act like playlists of files. Create one with
+`marlin coll create `, add files via
+`marlin coll add ` and list contents using
+`marlin coll list `.
+
+**Views** save search queries for quick reuse. Save a query with
+`marlin view save "tag:todo"`, list all views using
+`marlin view list` and execute one with `marlin view exec `.
+
+Other handy commands include:
+
+- `marlin watch ` to keep the index updated in real time.
+- `marlin backup run` to create or prune database backups.
+- `marlin link add` to relate files with typed edges.
+- `marlin annotate add` to attach notes or highlights.
+
## License
Licensed under the [MIT License](LICENSE).
diff --git a/cli-bin/docs/cli_cheatsheet.md b/cli-bin/docs/cli_cheatsheet.md
index f9297c7..402fe7f 100644
--- a/cli-bin/docs/cli_cheatsheet.md
+++ b/cli-bin/docs/cli_cheatsheet.md
@@ -21,3 +21,4 @@
| `version diff` | — |
| `event add` | — |
| `event timeline` | — |
+| `backup run` | --dir, --prune, --verify, --file |
diff --git a/cli-bin/src/cli.rs b/cli-bin/src/cli.rs
index 2cc734c..a688035 100644
--- a/cli-bin/src/cli.rs
+++ b/cli-bin/src/cli.rs
@@ -1,6 +1,7 @@
// src/cli.rs
pub mod annotate;
+pub mod backup;
pub mod coll;
pub mod event;
pub mod link;
@@ -73,8 +74,8 @@ pub enum Commands {
exec: Option,
},
- /// Create a timestamped backup of the database
- Backup,
+ /// Create or manage database backups
+ Backup(backup::BackupOpts),
/// Restore from a backup file (overwrites current DB)
Restore { backup_path: std::path::PathBuf },
diff --git a/cli-bin/src/cli/backup.rs b/cli-bin/src/cli/backup.rs
new file mode 100644
index 0000000..9d219a8
--- /dev/null
+++ b/cli-bin/src/cli/backup.rs
@@ -0,0 +1,67 @@
+// src/cli/backup.rs
+use crate::cli::Format;
+use anyhow::{Context, Result};
+use clap::Args;
+use libmarlin::backup::BackupManager;
+use rusqlite::Connection;
+use std::path::{Path, PathBuf};
+
+/// Options for the `backup` command
+#[derive(Args, Debug)]
+pub struct BackupOpts {
+ /// Directory to store backups (defaults next to DB)
+ #[arg(long)]
+ pub dir: Option,
+
+ /// Keep only N newest backups
+ #[arg(long)]
+ pub prune: Option,
+
+ /// Verify a backup file
+ #[arg(long)]
+ pub verify: bool,
+
+ /// Backup file to verify (used with --verify)
+ #[arg(long)]
+ pub file: Option,
+}
+
+pub fn run(opts: &BackupOpts, db_path: &Path, _conn: &mut Connection, _fmt: Format) -> Result<()> {
+ let backups_dir = opts
+ .dir
+ .clone()
+ .unwrap_or_else(|| db_path.parent().unwrap().join("backups"));
+ let manager = BackupManager::new(db_path, &backups_dir)?;
+
+ if opts.verify {
+ let file = opts
+ .file
+ .as_ref()
+ .context("--file required with --verify")?;
+ let name = file
+ .file_name()
+ .and_then(|n| n.to_str())
+ .context("invalid backup file name")?;
+ let ok = manager.verify_backup(name)?;
+ if ok {
+ println!("Backup OK: {}", name);
+ } else {
+ println!("Backup corrupted: {}", name);
+ }
+ return Ok(());
+ }
+
+ if let Some(n) = opts.prune {
+ let result = manager.prune(n)?;
+ println!(
+ "Pruned {} old backups, kept {}",
+ result.removed.len(),
+ result.kept.len()
+ );
+ return Ok(());
+ }
+
+ let info = manager.create_backup()?;
+ println!("Created backup {}", info.id);
+ Ok(())
+}
diff --git a/cli-bin/src/cli/commands.yaml b/cli-bin/src/cli/commands.yaml
index 19ea663..343d6dc 100644
--- a/cli-bin/src/cli/commands.yaml
+++ b/cli-bin/src/cli/commands.yaml
@@ -79,3 +79,9 @@ event:
add:
args: [file, date, description]
timeline: {}
+
+backup:
+ description: "Create, prune or verify backups"
+ actions:
+ run:
+ flags: ["--dir", "--prune", "--verify", "--file"]
diff --git a/cli-bin/src/main.rs b/cli-bin/src/main.rs
index 4be42fa..0a8766f 100644
--- a/cli-bin/src/main.rs
+++ b/cli-bin/src/main.rs
@@ -9,6 +9,7 @@
mod cli; // sub-command definitions and argument structs
/* ── shared modules re-exported from libmarlin ─────────────────── */
+use libmarlin::backup::BackupManager;
use libmarlin::db::take_dirty;
use libmarlin::{config, db, logging, scan, utils::determine_scan_root};
@@ -41,7 +42,7 @@ fn main() -> Result<()> {
let cfg = config::Config::load()?; // resolves DB path
match &args.command {
- Commands::Init | Commands::Backup | Commands::Restore { .. } => {}
+ Commands::Init | Commands::Backup(_) | Commands::Restore { .. } => {}
_ => match db::backup(&cfg.db_path) {
Ok(p) => info!("Pre-command auto-backup created at {}", p.display()),
Err(e) => error!("Failed to create pre-command auto-backup: {e}"),
@@ -100,23 +101,43 @@ fn main() -> Result<()> {
Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
/* ---- maintenance ---------------------------------------- */
- Commands::Backup => {
- let p = db::backup(&cfg.db_path)?;
- println!("Backup created: {}", p.display());
+ Commands::Backup(opts) => {
+ cli::backup::run(&opts, &cfg.db_path, &mut conn, args.format)?;
}
Commands::Restore { backup_path } => {
- drop(conn);
- db::restore(&backup_path, &cfg.db_path)
- .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?;
+ drop(conn); // close connection so the restore can overwrite the DB file
+
+ if backup_path.exists() {
+ // User pointed to an actual backup file on disk
+ db::restore(&backup_path, &cfg.db_path).with_context(|| {
+ format!("Failed to restore DB from {}", backup_path.display())
+ })?;
+ } else {
+ // Assume they passed just the file-name that lives in the standard backups dir
+ let backups_dir = cfg.db_path.parent().unwrap().join("backups");
+ let manager = BackupManager::new(&cfg.db_path, &backups_dir)?;
+
+ let name = backup_path
+ .file_name()
+ .and_then(|n| n.to_str())
+ .context("invalid backup file name")?;
+
+ manager.restore_from_backup(name).with_context(|| {
+ format!("Failed to restore DB from {}", backup_path.display())
+ })?;
+ }
+
println!("Restored DB from {}", backup_path.display());
+
+ // Re-open so the rest of the program talks to the fresh database
db::open(&cfg.db_path).with_context(|| {
format!("Could not open restored DB at {}", cfg.db_path.display())
})?;
info!("Successfully opened restored database.");
}
- /* ---- passthrough sub-modules (some still stubs) ---------- */
+ /* ---- passthrough sub-modules ---------------------------- */
Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?,
Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?,
Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?,
diff --git a/docs/adr/DP-001_schema_v1.1.md b/docs/adr/DP-001_schema_v1.1.md
index 37a46cf..e8bf484 100644
--- a/docs/adr/DP-001_schema_v1.1.md
+++ b/docs/adr/DP-001_schema_v1.1.md
@@ -1,6 +1,6 @@
# DP-001: Schema v1.1 – Core Metadata Domains
-**Status**: Proposed
+**Status**: Accepted
**Authors**: @carol
**Date**: 2025-05-17
@@ -8,14 +8,14 @@
We’ve landed a basic SQLite-backed `files` table and a contentless FTS5 index. Before we build out higher-level features, we need to lock down our **v1.1** metadata schema for:
-- **Hierarchical tags** (`tags` + `file_tags`)
+- **Hierarchical tags** (`tags` + `file_tags`) – optional `canonical_id` for aliases
- **Custom attributes** (`attributes`)
- **File-to-file relationships** (`links`)
- **Named collections** (`collections` + `collection_files`)
-- **Saved views** (`saved_views`)
+- **Views** (`views`)
Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about.
-*Note: Tag aliases and their `canonical_id` support are deferred to DP-006 (v1.5).*
+Tags optionally reference a canonical tag via the `canonical_id` column.
## 2. Decision
@@ -33,9 +33,10 @@ All foreign keys use `ON DELETE CASCADE` so deleting a file, tag, etc. automatic
1. **0001\_initial\_schema.sql** – create core tables (`files`, `tags`, `file_tags`, `attributes`), a contentless FTS5 table (`files_fts`), core FTS triggers, and performance-critical indexes.
2. **0002\_update\_fts\_and\_triggers.sql** – replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh.
- 3. **0003\_create\_links\_collections\_saved\_views.sql** – introduce `links`, `collections`, `collection_files`, and `saved_views` tables.
+ 3. **0003\_create\_links\_collections\_views.sql** – introduce `links`, `collections`, `collection_files`, and `views` tables.
4. **0004\_fix\_hierarchical\_tags\_fts.sql** – refine FTS triggers to index full hierarchical tag-paths via a recursive CTE.
3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store.
+4. Track the version in code via `SCHEMA_VERSION` and provide `current_schema_version()` to query the DB.
## 3. ER Diagram
@@ -57,6 +58,7 @@ entity tags {
--
name : TEXT
parent_id : INTEGER <>
+ canonical_id : INTEGER <>
}
entity file_tags {
@@ -91,7 +93,7 @@ entity collection_files {
* file_id : INTEGER <>
}
-entity saved_views {
+entity views {
* id : INTEGER <>
--
name : TEXT
@@ -109,7 +111,7 @@ files ||--o{ links : "dst_file_id"
collections ||--o{ collection_files
files ||--o{ collection_files
-saved_views ||..|| files : "exec via FTS"
+views ||..|| files : "exec via FTS"
@enduml
```
@@ -135,7 +137,7 @@ Or in plain-ASCII:
└─────────────┘ └──────────────────┘ └────────┘
┌─────────────┐
-│ saved_views │
+│ views │
│ (exec FTS) │
└─────────────┘
```
@@ -146,8 +148,9 @@ Or in plain-ASCII:
| ------------------------------------------------------ | ------------------------------------------------------------- |
| **0001\_initial\_schema.sql** | Core tables + contentless FTS + core triggers + indexes |
| **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes |
-| **0003\_create\_links\_collections\_saved\_views.sql** | Add `links`, `collections`, `collection_files`, `saved_views` |
+| **0003\_create\_links\_collections\_views.sql** | Add `links`, `collections`, `collection_files`, `views` |
| **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full tag-path indexing in FTS triggers |
+| **0005_add_dirty_table.sql** | Track modified files needing reindexing |
### Performance-Critical Indexes
diff --git a/docs/adr/DP-002_dirty-flag.md b/docs/adr/DP-002_dirty-flag.md
new file mode 100644
index 0000000..8e25b64
--- /dev/null
+++ b/docs/adr/DP-002_dirty-flag.md
@@ -0,0 +1,7 @@
+# DP-002: Dirty Flag Design & FTS Rebuild Cadence
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+This placeholder reserves the DP number for the dirty-scan design proposal described in the roadmap. Details will be filled in once the full specification is drafted.
diff --git a/docs/adr/DP-004_content-blob_strategy.md b/docs/adr/DP-004_content-blob_strategy.md
new file mode 100644
index 0000000..ee02f24
--- /dev/null
+++ b/docs/adr/DP-004_content-blob_strategy.md
@@ -0,0 +1,7 @@
+# DP-004: Content-Blob Strategy (Inline vs External Table)
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Reserved for the proposal covering how file contents are stored and indexed for full-text search and annotations.
diff --git a/docs/adr/DP-005_hash_and_dedupe.md b/docs/adr/DP-005_hash_and_dedupe.md
new file mode 100644
index 0000000..71553ea
--- /dev/null
+++ b/docs/adr/DP-005_hash_and_dedupe.md
@@ -0,0 +1,7 @@
+# DP-005: Hash Column & Bloom-Based Deduplication
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Reserved for the proposal defining SHA-256 hashing and duplicate detection via Bloom filters.
diff --git a/docs/adr/DP-006_embeddings_and_models.md b/docs/adr/DP-006_embeddings_and_models.md
new file mode 100644
index 0000000..8c2d41b
--- /dev/null
+++ b/docs/adr/DP-006_embeddings_and_models.md
@@ -0,0 +1,7 @@
+# DP-006: Embeddings Size & Model Choice
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Placeholder for design decisions around semantic embeddings, vector store schema, and model selection.
diff --git a/docs/adr/DP-007_search_dsl_v2.md b/docs/adr/DP-007_search_dsl_v2.md
new file mode 100644
index 0000000..b4db58c
--- /dev/null
+++ b/docs/adr/DP-007_search_dsl_v2.md
@@ -0,0 +1,7 @@
+# DP-007: Search DSL v2 Grammar
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Reserved for the formal grammar and parser design for the advanced search language.
diff --git a/docs/adr/DP-008_workflow_tables.md b/docs/adr/DP-008_workflow_tables.md
new file mode 100644
index 0000000..56789df
--- /dev/null
+++ b/docs/adr/DP-008_workflow_tables.md
@@ -0,0 +1,7 @@
+# DP-008: Workflow Tables & Validation
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Placeholder for the schema and validation rules supporting structured workflows and relationship templates.
diff --git a/docs/adr/DP-009_tui_keymap.md b/docs/adr/DP-009_tui_keymap.md
new file mode 100644
index 0000000..25e53e5
--- /dev/null
+++ b/docs/adr/DP-009_tui_keymap.md
@@ -0,0 +1,7 @@
+# DP-009: TUI Key Map & Redraw Budget
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Reserved for the design of keyboard interactions and performance targets for the TUI.
diff --git a/docs/adr/DP-010_kde_sidebar.md b/docs/adr/DP-010_kde_sidebar.md
new file mode 100644
index 0000000..76d95dd
--- /dev/null
+++ b/docs/adr/DP-010_kde_sidebar.md
@@ -0,0 +1,7 @@
+# DP-010: DB/IP Bridge for KDE Sidebar
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Placeholder for communication mechanisms and packaging strategy for the Dolphin sidebar integration.
diff --git a/docs/adr/DP-011_sync_backend.md b/docs/adr/DP-011_sync_backend.md
new file mode 100644
index 0000000..6ede3f1
--- /dev/null
+++ b/docs/adr/DP-011_sync_backend.md
@@ -0,0 +1,7 @@
+# DP-011: Sync Backend Trade-Study
+
+**Status**: Pending
+**Authors**: TBA
+**Date**: 2025-05-19
+
+Reserved for evaluation of synchronization approaches and end-to-end UI test plan.
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 160513e..9556e73 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -1,6 +1,6 @@
-# Marlin ― Delivery Road-map **v3**
+# Marlin ― Delivery Road-map **v3.2**
-*Engineering-ready version — updated 2025-05-17*
+*Engineering-ready version — updated 2025-05-18*
> **Legend**
> **△** = engineering artefact (spec / ADR / perf target) **✦** = user-visible deliverable
@@ -25,7 +25,7 @@
| Phase / Sprint | Timeline | Focus & Rationale | ✦ Key UX Deliverables | △ Engineering artefacts / tasks | Definition of Done |
| --------------------------------------------- | -------- | ---------------------------------------- | -------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
| ~~**Epic 1 — Scale & Reliability**~~ | ~~2025-Q2~~ | ~~Stay fast @ 100 k files~~ | ~~• `scan --dirty` (re-index touched rows only)~~ | ~~• DP-002 Dirty-flag design + FTS rebuild cadence
• Hyperfine benchmark script committed~~ | ~~Dirty scan vs full ≤ 15 % runtime on 100 k corpus; benchmark job passes~~ |
-| **Epic 2 — Live Mode & Self-Pruning Backups** | 2025-Q2 | “Just works” indexing, DB never explodes | • `marlin watch ` (notify/FSEvents)
• `backup --prune N` & auto-prune | • DP-003 file-watcher life-cycle & debouncing
• Integration test with inotify-sim
• Cron-style GitHub job for nightly prune | 8 h stress-watch alters 10 k files < 1 % misses; backup dir ≤ N |
+| **Epic 2 — Live Mode & Self-Pruning Backups** | 2025-Q2 | “Just works” indexing, DB never explodes | • `marlin watch ` (notify/FSEvents)
• `backup --prune N` & auto-prune
• rename/move tracking keeps paths current | • DP-003 file-watcher life-cycle & debouncing
• Integration test with inotify-sim
• Rename/Move handling spec & tests
• Cron-style GitHub job for nightly prune | 8 h stress-watch alters 10 k files < 1 % misses; backup dir ≤ N |
| **Phase 3 — Content FTS + Annotations** | 2025-Q3 | Search inside files, leave notes | • Grep-style snippet output (`-C3`)
• `marlin annotate add/list` | • DP-004 content-blob strategy (inline vs ext-table)
• Syntax-highlight via `syntect` PoC
• New FTS triggers unit-tested | Indexes 1 GB corpus in ≤ 30 min; snippet CLI passes golden-file tests |
| **Phase 4 — Versioning & Deduplication** | 2025-Q3 | Historic diffs, detect dupes | • `scan --rehash` (SHA-256)
• `version diff ` | • DP-005 hash column + Bloom-de-dupe
• Binary diff adapter research | Diff on 10 MB file ≤ 500 ms; dupes listed via CLI |
| **Phase 5 — Tag Aliases & Semantic Booster** | 2025-Q3 | Tame tag sprawl, start AI hints | • `tag alias add/ls/rm`
• `tag suggest`, `summary` | • DP-006 embeddings size & model choice
• Vector store schema + k-NN index bench | 95 % of “foo/bar~foo” alias look-ups resolve in one hop; suggest CLI returns ≤ 150 ms |
@@ -39,15 +39,21 @@
### 2 · Feature cross-matrix (quick look-ups)
-| Capability | Sprint / Phase | CLI flag or GUI element | Linked DP |
-| ------------------------------------- | -------------- | ---------------------------------- | --------- |
-| Relationship **templates** | P7 | `template new`, `template apply` | DP-008 |
-| Positive / negative filter combinator | P6 | DSL `+tag:foo -tag:bar date>=2025` | DP-007 |
-| ~~Dirty-scan optimisation~~ | ~~E1~~ | ~~`scan --dirty`~~ | ~~DP-002~~ |
-| Watch-mode | E2 | `marlin watch .` | DP-003 |
-| Grep snippets | P3 | `search -C3 "foo"` | DP-004 |
-| Hash / dedupe | P4 | `scan --rehash` | DP-005 |
+| Capability | Sprint / Phase | CLI / GUI element | Linked DP |
+| -------------------------- | -------------- | -------------------- | --------- |
+| Crate split & docs autogen | S0 | — | – |
+| Tarpaulin coverage gate | S0 | — | – |
+| Watch mode (FS events) | Epic 1 | `marlin watch .` | DP‑002 |
+| Backup auto‑prune | Epic 1 | `backup --prune N` | – |
+| Rename/move tracking | Epic 2 | automatic path update | Spec‑RMH |
+| Dirty‑scan | Epic 2 | `scan --dirty` | DP‑002 |
+| Grep snippets | Phase 3 | `search -C3 …` | DP‑004 |
+| Hash / dedupe | Phase 4 | `scan --rehash` | DP‑005 |
+| Tag aliases | Phase 5 | `tag alias` commands | DP‑006 |
+| Search DSL v2 | Phase 6 | new grammar, `--legacy-search` flag | DP‑007 |
+| Relationship templates | Phase 7 | `template new/apply` | DP‑008 |
+| TUI v1 | Phase 8 | `marlin‑tui` | DP‑009 |
---
## 3 · Milestone acceptance checklist
@@ -65,8 +71,11 @@ Before a milestone is declared “shipped”:
### 4 · Next immediate actions
-~~1. **Write DP-001 (Schema v1.1)** — owner @alice, due 21 May~~
-~~2. **Set up Tarpaulin & Hyperfine jobs** — @bob, due 23 May~~
-~~3. **Spike dirty-flag logic** — @carol 2-day time-box, outcome in DP-002~~
+| # | Task | Owner | Due |
+| - | ------------------------------ | ------ | ------------- |
+| ~~1~~ | ~~Crate split + CI baseline~~ | @alice | ~~26 May 25~~ |
+| ~~2~~ | ~~Tarpaulin + Hyperfine jobs~~ | @bob | ~~26 May 25~~ |
+| 3 | **DP‑001 Schema v1.1** draft | @carol | **30 May 25** |
+| ~~4~~ | ~~backup prune CLI + nightly job~~ | @dave | ~~05 Jun 25~~ |
> *This roadmap now contains both product-level “what” and engineering-level “how/when/prove it”. It should allow a new contributor to jump in, pick the matching DP, and know exactly the bar they must clear for their code to merge.*
diff --git a/docs/spec-details/Rename+Move-Handling.md b/docs/spec-details/Rename+Move-Handling.md
new file mode 100644
index 0000000..8daefa5
--- /dev/null
+++ b/docs/spec-details/Rename+Move-Handling.md
@@ -0,0 +1,71 @@
+# Marlin — Rename & Move Handling
+
+**Integration Specification · v0.1 (2025-05-19)**
+
+---
+
+## 0 · Scope
+
+This document outlines how Marlin should respond when files or folders are renamed or moved. It extends the watcher life‑cycle design (DP‑003) so that metadata remains consistent without requiring a full re‑scan.
+
+## 1 · Background
+
+The current watcher maps any `notify::EventKind::Modify(_)` – including renames – to the generic `EventPriority::Modify` and merely logs the event:
+
+```
+415 let prio = match event.kind {
+416 EventKind::Create(_) => EventPriority::Create,
+417 EventKind::Remove(_) => EventPriority::Delete,
+418 EventKind::Modify(_) => EventPriority::Modify,
+419 EventKind::Access(_) => EventPriority::Access,
+420 _ => EventPriority::Modify,
+421 };
+...
+455 for event_item in &evts_to_process {
+456 info!("Processing event (DB available): {:?} for path {:?}",
+457 event_item.kind, event_item.path);
+458 }
+```
+
+No database update occurs, so renamed files keep their old `path` in the `files` table. The schema does have a trigger to propagate `path` updates to the FTS index:
+
+```
+72 -- When a file’s path changes
+73 DROP TRIGGER IF EXISTS files_fts_au_file;
+74 CREATE TRIGGER files_fts_au_file
+75 AFTER UPDATE OF path ON files
+76 BEGIN
+77 UPDATE files_fts
+78 SET path = NEW.path
+79 WHERE rowid = NEW.id;
+80 END;
+```
+
+## 2 · Requirements
+
+1. **Detect old and new paths** from `Rename` events provided by the `notify` crate.
+2. **Update the `files` table** with the new absolute path when the target remains inside a scanned root.
+3. **Mark as removed** if the new location is outside all configured roots.
+4. **Batch updates** to avoid excessive writes during large folder moves.
+5. **Integration tests** exercising rename and move scenarios across platforms.
+
+## 3 · Implementation Sketch
+
+* Extend `ProcessedEvent` to carry `old_path` and `new_path` for `Rename` events.
+* Upon flushing events, call `db::mark_dirty` for the affected row, then update the `files.path` column. The existing trigger keeps `files_fts` in sync.
+* For directory renames, update child paths with a single SQL `UPDATE ... WHERE path LIKE 'old/%'` inside a transaction.
+* Emit `Create` and `Remove` events for files crossing watch boundaries so `scan --dirty` can prune or index them accordingly.
+
+## 4 · Edge Cases
+
+* **Atomic cross-filesystem moves** may surface as `Remove` + `Create`; both should be handled.
+* **Concurrent modifications** while moving should result in the newer metadata winning when `scan --dirty` runs.
+
+## 5 · Future Work
+
+Large scale refactors (e.g. moving an entire project) may benefit from a high‑level command that updates tags and links en masse. That is outside the scope of this spec but enabled by accurate rename tracking.
+
+---
+
+*End of document*
+
diff --git a/libmarlin/Cargo.toml b/libmarlin/Cargo.toml
index 14dbae6..af61a9b 100644
--- a/libmarlin/Cargo.toml
+++ b/libmarlin/Cargo.toml
@@ -27,6 +27,7 @@ json = ["serde_json"]
[dev-dependencies]
# for temporary directories in config_tests.rs and scan_tests.rs
tempfile = "3"
+lazy_static = "1"
# you already have rusqlite in [dependencies], so scan_tests.rs
# can just use rusqlite::Connection, no need to repeat it here.
diff --git a/libmarlin/src/backup.rs b/libmarlin/src/backup.rs
index 7834da3..b184cc4 100644
--- a/libmarlin/src/backup.rs
+++ b/libmarlin/src/backup.rs
@@ -216,6 +216,19 @@ impl BackupManager {
Ok(PruneResult { kept, removed })
}
+ pub fn verify_backup(&self, backup_id: &str) -> Result {
+ let backup_file_path = self.backups_dir.join(backup_id);
+ if !backup_file_path.exists() || !backup_file_path.is_file() {
+ return Err(anyhow::Error::new(marlin_error::Error::NotFound(format!(
+ "Backup file not found or is not a file: {}",
+ backup_file_path.display()
+ ))));
+ }
+ let conn = rusqlite::Connection::open(&backup_file_path)?;
+ let res: String = conn.query_row("PRAGMA integrity_check", [], |r| r.get(0))?;
+ Ok(res == "ok")
+ }
+
pub fn restore_from_backup(&self, backup_id: &str) -> Result<()> {
let backup_file_path = self.backups_dir.join(backup_id);
if !backup_file_path.exists() || !backup_file_path.is_file() {
@@ -532,4 +545,18 @@ mod tests {
assert_eq!(info.id, "backup_badformat.db");
assert_eq!(info.timestamp, expected_ts);
}
+
+ #[test]
+ fn verify_backup_ok() {
+ let tmp = tempdir().unwrap();
+ let live_db = tmp.path().join("live_verify.db");
+ let _conn = create_valid_live_db(&live_db);
+
+ let backups_dir = tmp.path().join("ver_backups");
+ let manager = BackupManager::new(&live_db, &backups_dir).unwrap();
+ let info = manager.create_backup().unwrap();
+
+ let ok = manager.verify_backup(&info.id).unwrap();
+ assert!(ok, "expected integrity check to pass");
+ }
}
diff --git a/libmarlin/src/config_tests.rs b/libmarlin/src/config_tests.rs
index 2c79883..b7d352c 100644
--- a/libmarlin/src/config_tests.rs
+++ b/libmarlin/src/config_tests.rs
@@ -1,11 +1,13 @@
// libmarlin/src/config_tests.rs
use super::config::Config;
+use crate::test_utils::ENV_MUTEX;
use std::env;
use tempfile::tempdir;
#[test]
fn load_env_override() {
+ let _guard = ENV_MUTEX.lock().unwrap();
let tmp = tempdir().unwrap();
let db = tmp.path().join("custom.db");
env::set_var("MARLIN_DB_PATH", &db);
@@ -16,6 +18,7 @@ fn load_env_override() {
#[test]
fn load_xdg_or_fallback() {
+ let _guard = ENV_MUTEX.lock().unwrap();
// since XDG_DATA_HOME will normally be present, just test it doesn't error
let cfg = Config::load().unwrap();
assert!(cfg.db_path.to_string_lossy().ends_with(".db"));
@@ -23,6 +26,7 @@ fn load_xdg_or_fallback() {
#[test]
fn load_fallback_current_dir() {
+ let _guard = ENV_MUTEX.lock().unwrap();
// Save and clear HOME & XDG_DATA_HOME
let orig_home = env::var_os("HOME");
let orig_xdg = env::var_os("XDG_DATA_HOME");
diff --git a/libmarlin/src/db/migrations/0001_initial_schema.sql b/libmarlin/src/db/migrations/0001_initial_schema.sql
index b3b3dd8..0780bf3 100644
--- a/libmarlin/src/db/migrations/0001_initial_schema.sql
+++ b/libmarlin/src/db/migrations/0001_initial_schema.sql
@@ -17,7 +17,7 @@ CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL, -- tag segment
parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
- canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
+ canonical_id INTEGER REFERENCES tags(id),
UNIQUE(name, parent_id)
);
diff --git a/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql b/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql
new file mode 100644
index 0000000..6180843
--- /dev/null
+++ b/libmarlin/src/db/migrations/0006_drop_tags_canonical_id.sql
@@ -0,0 +1,6 @@
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+
+-- Remove canonical_id column from tags table
+ALTER TABLE tags DROP COLUMN canonical_id;
+
diff --git a/libmarlin/src/db/mod.rs b/libmarlin/src/db/mod.rs
index fc7974d..4ee4241 100644
--- a/libmarlin/src/db/mod.rs
+++ b/libmarlin/src/db/mod.rs
@@ -18,6 +18,11 @@ use rusqlite::{
use std::result::Result as StdResult;
use tracing::{debug, info, warn};
+/* ─── schema version ───────────────────────────────────────────────── */
+
+/// Current library schema version.
+pub const SCHEMA_VERSION: i32 = MIGRATIONS.len() as i32;
+
/* ─── embedded migrations ─────────────────────────────────────────── */
const MIGRATIONS: &[(&str, &str)] = &[
@@ -41,8 +46,24 @@ const MIGRATIONS: &[(&str, &str)] = &[
"0005_add_dirty_table.sql",
include_str!("migrations/0005_add_dirty_table.sql"),
),
+ (
+ "0006_drop_tags_canonical_id.sql",
+ include_str!("migrations/0006_drop_tags_canonical_id.sql"),
+ ),
];
+/* ─── schema helpers ─────────────────────────────────────────────── */
+
+/// Fetch the highest version recorded in the `schema_version` table.
+pub fn current_schema_version(conn: &Connection) -> Result {
+ let version: i32 = conn.query_row(
+ "SELECT IFNULL(MAX(version), 0) FROM schema_version",
+ [],
+ |r| r.get(0),
+ )?;
+ Ok(version)
+}
+
/* ─── connection bootstrap ────────────────────────────────────────── */
pub fn open>(db_path: P) -> Result {
@@ -129,6 +150,15 @@ pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
warn!("migrations not applied: {:?}", missing);
}
+ let current = current_schema_version(conn)?;
+ if current != SCHEMA_VERSION {
+ anyhow::bail!(
+ "database schema version {} does not match library version {}",
+ current,
+ SCHEMA_VERSION
+ );
+ }
+
Ok(())
}
diff --git a/libmarlin/src/db_tests.rs b/libmarlin/src/db_tests.rs
index 29c4e69..76527e1 100644
--- a/libmarlin/src/db_tests.rs
+++ b/libmarlin/src/db_tests.rs
@@ -234,3 +234,64 @@ mod dirty_helpers {
assert!(empty.is_empty());
}
}
+
+#[test]
+fn tables_exist_and_fts_triggers() {
+ use super::Marlin;
+ use std::fs;
+
+ let tmp = tempdir().unwrap();
+ let db_path = tmp.path().join("test.db");
+ let mut marlin = Marlin::open_at(&db_path).unwrap();
+
+ // the DB file should exist after opening
+ assert!(db_path.exists());
+
+ // confirm required tables
+ for table in ["links", "collections", "collection_files", "views"] {
+ let cnt: i64 = marlin
+ .conn()
+ .query_row(
+ "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
+ [table],
+ |r| r.get(0),
+ )
+ .unwrap();
+ assert_eq!(cnt, 1, "missing table {table}");
+ }
+
+ // create a file to index
+ let file_dir = tmp.path().join("files");
+ fs::create_dir(&file_dir).unwrap();
+ let file_path = file_dir.join("sample.txt");
+ fs::write(&file_path, "hello world").unwrap();
+
+ // index via public helper
+ marlin.scan(&[&file_dir]).unwrap();
+ marlin.tag("*.txt", "foo/bar").unwrap();
+
+ let fid = db::file_id(marlin.conn(), file_path.to_str().unwrap()).unwrap();
+ db::upsert_attr(marlin.conn(), fid, "color", "blue").unwrap();
+
+ // The FTS index is contentless, so columns return empty strings. Instead
+ // verify that searching for our tag and attribute yields the file path.
+ let hits_tag: Vec = marlin
+ .conn()
+ .prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'foo'")
+ .unwrap()
+ .query_map([], |r| r.get(0))
+ .unwrap()
+ .collect::, _>>()
+ .unwrap();
+ assert!(hits_tag.contains(&file_path.to_string_lossy().into_owned()));
+
+ let hits_attr: Vec = marlin
+ .conn()
+ .prepare(r#"SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH '"color=blue"'"#)
+ .unwrap()
+ .query_map([], |r| r.get(0))
+ .unwrap()
+ .collect::, _>>()
+ .unwrap();
+ assert!(hits_attr.contains(&file_path.to_string_lossy().into_owned()));
+}
diff --git a/libmarlin/src/facade_tests.rs b/libmarlin/src/facade_tests.rs
index 2e3f259..0f3b0e1 100644
--- a/libmarlin/src/facade_tests.rs
+++ b/libmarlin/src/facade_tests.rs
@@ -1,11 +1,13 @@
// libmarlin/src/facade_tests.rs
use super::*; // brings Marlin, config, etc.
+use crate::test_utils::ENV_MUTEX;
use std::{env, fs};
use tempfile::tempdir;
#[test]
fn open_at_and_scan_and_search() {
+ let _guard = ENV_MUTEX.lock().unwrap();
// 1) Prepare a temp workspace with one file
let tmp = tempdir().unwrap();
let file = tmp.path().join("hello.txt");
@@ -33,6 +35,7 @@ fn open_at_and_scan_and_search() {
#[test]
fn tag_and_search_by_tag() {
+ let _guard = ENV_MUTEX.lock().unwrap();
let tmp = tempdir().unwrap();
let a = tmp.path().join("a.md");
let b = tmp.path().join("b.md");
@@ -56,6 +59,7 @@ fn tag_and_search_by_tag() {
#[test]
fn open_default_fallback_config() {
+ let _guard = ENV_MUTEX.lock().unwrap();
// Unset all overrides
env::remove_var("MARLIN_DB_PATH");
env::remove_var("XDG_DATA_HOME");
diff --git a/libmarlin/src/lib.rs b/libmarlin/src/lib.rs
index ff19a88..d699a6d 100644
--- a/libmarlin/src/lib.rs
+++ b/libmarlin/src/lib.rs
@@ -27,6 +27,8 @@ mod logging_tests;
#[cfg(test)]
mod scan_tests;
#[cfg(test)]
+mod test_utils;
+#[cfg(test)]
mod utils_tests;
#[cfg(test)]
mod watcher_tests;
diff --git a/libmarlin/src/test_utils.rs b/libmarlin/src/test_utils.rs
new file mode 100644
index 0000000..ac96844
--- /dev/null
+++ b/libmarlin/src/test_utils.rs
@@ -0,0 +1,8 @@
+use std::sync::Mutex;
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ /// Global mutex to serialize environment-variable modifications in tests.
+ pub static ref ENV_MUTEX: Mutex<()> = Mutex::new(());
+}