diff --git a/Cargo.lock b/Cargo.lock
index 1426346..edbd8f9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -94,6 +94,22 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+[[package]]
+name = "assert_cmd"
+version = "2.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66"
+dependencies = [
+ "anstyle",
+ "bstr",
+ "doc-comment",
+ "libc",
+ "predicates",
+ "predicates-core",
+ "predicates-tree",
+ "wait-timeout",
+]
+
[[package]]
name = "autocfg"
version = "1.4.0"
@@ -106,6 +122,17 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
+[[package]]
+name = "bstr"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+dependencies = [
+ "memchr",
+ "regex-automata 0.4.9",
+ "serde",
+]
+
[[package]]
name = "bumpalo"
version = "3.17.0"
@@ -163,6 +190,15 @@ dependencies = [
"strsim",
]
+[[package]]
+name = "clap_complete"
+version = "4.5.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c91d3baa3bcd889d60e6ef28874126a0b384fd225ab83aa6d8a801c519194ce1"
+dependencies = [
+ "clap",
+]
+
[[package]]
name = "clap_derive"
version = "4.5.32"
@@ -193,6 +229,12 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
[[package]]
name = "directories"
version = "5.0.1"
@@ -202,6 +244,15 @@ dependencies = [
"dirs-sys 0.4.1",
]
+[[package]]
+name = "dirs"
+version = "5.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
+dependencies = [
+ "dirs-sys 0.4.1",
+]
+
[[package]]
name = "dirs"
version = "6.0.0"
@@ -235,6 +286,22 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "doc-comment"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+
+[[package]]
+name = "errno"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
[[package]]
name = "fallible-iterator"
version = "0.3.0"
@@ -247,6 +314,21 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "float-cmp"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
+dependencies = [
+ "num-traits",
+]
+
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -255,7 +337,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
- "wasi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@@ -318,6 +412,12 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
[[package]]
name = "js-sys"
version = "0.3.77"
@@ -361,6 +461,12 @@ dependencies = [
"vcpkg",
]
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
[[package]]
name = "log"
version = "0.4.27"
@@ -372,13 +478,19 @@ name = "marlin"
version = "0.1.0"
dependencies = [
"anyhow",
+ "assert_cmd",
"chrono",
"clap",
+ "clap_complete",
"directories",
+ "dirs 5.0.1",
"glob",
+ "predicates",
"rusqlite",
+ "serde_json",
"shellexpand",
"shlex",
+ "tempfile",
"tracing",
"tracing-subscriber",
"walkdir",
@@ -399,6 +511,12 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+[[package]]
+name = "normalize-line-endings"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
+
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -448,6 +566,36 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+[[package]]
+name = "predicates"
+version = "3.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573"
+dependencies = [
+ "anstyle",
+ "difflib",
+ "float-cmp",
+ "normalize-line-endings",
+ "predicates-core",
+ "regex",
+]
+
+[[package]]
+name = "predicates-core"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa"
+
+[[package]]
+name = "predicates-tree"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c"
+dependencies = [
+ "predicates-core",
+ "termtree",
+]
+
[[package]]
name = "proc-macro2"
version = "1.0.95"
@@ -466,13 +614,19 @@ dependencies = [
"proc-macro2",
]
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
[[package]]
name = "redox_users"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
dependencies = [
- "getrandom",
+ "getrandom 0.2.16",
"libredox",
"thiserror 1.0.69",
]
@@ -483,7 +637,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
- "getrandom",
+ "getrandom 0.2.16",
"libredox",
"thiserror 2.0.12",
]
@@ -546,12 +700,31 @@ dependencies = [
"smallvec",
]
+[[package]]
+name = "rustix"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.59.0",
+]
+
[[package]]
name = "rustversion"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
[[package]]
name = "same-file"
version = "1.0.6"
@@ -561,6 +734,38 @@ dependencies = [
"winapi-util",
]
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.140"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
[[package]]
name = "sharded-slab"
version = "0.1.7"
@@ -576,7 +781,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb"
dependencies = [
- "dirs",
+ "dirs 6.0.0",
]
[[package]]
@@ -608,6 +813,25 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "tempfile"
+version = "3.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
+dependencies = [
+ "fastrand",
+ "getrandom 0.3.3",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "termtree"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
+
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -749,6 +973,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -765,6 +998,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
@@ -1052,6 +1294,15 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
+
[[package]]
name = "zerocopy"
version = "0.8.25"
diff --git a/Cargo.toml b/Cargo.toml
index 438a4a3..6e3c586 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,4 +15,17 @@ walkdir = "2.5"
shlex = "1.3"
chrono = "0.4"
shellexpand = "3.1"
+clap_complete = "4.1"
+serde_json = { version = "1", optional = true } # <-- NEW
+[dev-dependencies]
+assert_cmd = "2"
+predicates = "3"
+tempfile = "3"
+dirs = "5" # cross-platform data dir helper
+
+[features]
+# The CLI prints JSON only when this feature is enabled.
+# Having the feature listed silences the `unexpected cfg` lint even
+# when you don’t turn it on.
+json = ["serde_json"]
\ No newline at end of file
diff --git a/README.md b/README.md
index ffc35ff..55b467c 100644
--- a/README.md
+++ b/README.md
@@ -2,21 +2,24 @@
# Marlin
-**Marlin** is a lightweight, metadata-driven file indexer that runs 100 % on your computer. It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, takes automatic snapshots, and offers instant full-text search via FTS5.
-*No cloud, no telemetry – your data never leaves the machine.*
+**Marlin** is a lightweight, metadata-driven file indexer that runs **100 % on your computer**.
+It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, keeps timestamped **snapshots**, and offers instant full-text search via FTS5.
+_No cloud, no telemetry – your data never leaves the machine._
---
## Feature highlights
-| Area | What you get |
-| -------------- | --------------------------------------------------------------------------------- |
-| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) |
-| **Resilience** | Versioned, idempotent schema migrations – zero-downtime upgrades |
-| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency |
-| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) |
-| **Search** | Prefix-aware FTS5 across paths, tags, and attributes; optional `--exec` per match |
-| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation |
+| Area | What you get |
+| ------------------- | ----------------------------------------------------------------------------------------------------- |
+| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) |
+| **Resilience** | Versioned, idempotent schema migrations – zero-downtime upgrades |
+| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency |
+| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) |
+| **Relations** | Typed file ↔ file links (`marlin link`) with backlinks viewer |
+| **Collections / Views** | Named playlists (`marlin coll`) & saved searches (`marlin view`) for instant recall |
+| **Search** | Prefix-aware FTS5 across paths, tags, attrs & links; optional `--exec` per match
(grep-style context snippets coming Q3) |
+| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation |
---
@@ -26,11 +29,11 @@
┌──────────────┐ marlin scan ┌─────────────┐
│ your files │ ─────────────────────▶│ SQLite │
│ (any folder) │ │ files/tags │
-└──────────────┘ tag / attr │ attrs / FTS │
- ▲ search / exec └──────┬──────┘
+└──────────────┘ tag / attr / link │ attrs / FTS │
+ ▲ search / exec └──────┬──────┘
└────────── backup / restore ▼
timestamped snapshots
-```
+````
---
@@ -38,7 +41,7 @@
| Requirement | Why |
| ------------------ | ----------------------------- |
-| **Rust** ≥ 1.77 | Build toolchain (`rustup.rs`) |
+| **Rust ≥ 1.77** | Build toolchain (`rustup.rs`) |
| C build essentials | Builds bundled SQLite (Linux) |
macOS & Windows users: let the Rust installer pull the matching build tools.
@@ -48,32 +51,83 @@ macOS & Windows users: let the Rust installer pull the matching build tools.
## Build & install
```bash
-git clone https://github.com/yourname/marlin.git
-cd marlin
+git clone https://github.com/PR0M3TH3AN/Marlin.git
+cd Marlin
cargo build --release
-sudo install -Dm755 target/release/marlin /usr/local/bin/marlin # optional
+
+# (Optional) install into your PATH
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
---
## Quick start
+For a concise walkthrough—including **links, collections and views**—see
+[**Quick start & Demo**](marlin_demo.md).
+
+---
+
+## Testing
+
+Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
+
+### 0 Prepare once
+
```bash
-marlin init # create DB (idempotent)
-marlin scan ~/Pictures ~/Documents # index files
-marlin tag "~/Pictures/**/*.jpg" photos/trip-2024 # add tag
-marlin attr set "~/Documents/**/*.pdf" reviewed yes
-marlin search reviewed --exec "xdg-open {}" # open matches
-marlin backup # snapshot DB
+# Put build artefacts in one place (faster incremental builds)
+export CARGO_TARGET_DIR=target
```
+### 1 Build the new binary
+
+```bash
+git pull
+cargo build --release
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
+```
+
+### 2 Run the smoke-test suite
+
+```bash
+cargo test --test e2e -- --nocapture
+```
+
+*Streams CLI output live; exit-code 0 = all good.*
+
+### 3 (Optionally) run **all** tests
+
+```bash
+cargo test --all -- --nocapture
+```
+
+This now covers:
+
+* unit tests in `src/**`
+* positive & negative integration suites (`tests/pos.rs`, `tests/neg.rs`)
+* doc-tests
+
+#### One-liner helper
+
+```bash
+git pull && cargo build --release &&
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
+cargo test --test e2e -- --nocapture
+```
+
+Alias it as `marlin-ci` for a 5-second upgrade-and-verify loop.
+
+---
+
### Database location
-* **Linux** `~/.local/share/marlin/index.db`
-* **macOS** `~/Library/Application Support/marlin/index.db`
-* **Windows** `%APPDATA%\marlin\index.db`
+| OS | Default path |
+| ----------- | ----------------------------------------------- |
+| **Linux** | `~/.local/share/marlin/index.db` |
+| **macOS** | `~/Library/Application Support/marlin/index.db` |
+| **Windows** | `%APPDATA%\marlin\index.db` |
-Override with:
+Override:
```bash
export MARLIN_DB_PATH=/path/to/custom.db
@@ -86,190 +140,57 @@ export MARLIN_DB_PATH=/path/to/custom.db
```text
marlin [ARGS]
-init create / migrate database
-scan ... walk directories & index files
-tag "" add hierarchical tag
-attr set|ls … manage custom attributes
-search [--exec CMD] FTS query, optionally run CMD on each hit
-backup create timestamped snapshot in backups/
-restore replace DB with snapshot
+init create / migrate DB **and perform an initial scan of the cwd**
+scan ... walk directories & (re)index files
+tag "" add hierarchical tag
+attr set set or update custom attribute
+attr ls list attributes
+link add|rm|list|backlinks manage typed file-to-file relations
+coll create|add|list manage named collections (“playlists”)
+view save|list|exec save and run smart views (saved queries)
+search [--exec CMD] FTS5 query; optionally run CMD per hit
+backup create timestamped snapshot in `backups/`
+restore replace DB with snapshot
+completions generate shell completions
```
-### Attribute subcommands
+### Attribute sub-commands
-| Command | Example |
-| ---------- | ------------------------------------------------ |
-| `attr set` | `marlin attr set "~/Docs/**/*.pdf" reviewed yes` |
-| `attr ls` | `marlin attr ls ~/Docs/report.pdf` |
+| Command | Example |
+| ----------- | ------------------------------------------------ |
+| `attr set` | `marlin attr set ~/Docs/**/*.pdf reviewed yes` |
+| `attr ls` | `marlin attr ls ~/Docs/report.pdf` |
+| JSON output | `marlin --format=json attr ls ~/Docs/report.pdf` |
---
## Backups & restore
-*Create snapshot*
-
```bash
marlin backup
# → ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
-*Restore snapshot*
-
```bash
marlin restore ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
-Marlin also takes an **automatic safety backup before every schema migration**.
+> Marlin also creates an **automatic safety backup before every non-`init` command.**
+> *Auto-prune (`backup --prune `) lands in Q2.*
---
## Upgrading
```bash
-cargo install --path . --force # rebuild & replace installed binary
+cargo install --path . --force # rebuild & replace installed binary
```
-The versioned migration system preserves your data across upgrades.
-
----
-
-## Roadmap
-
-See [`ROADMAP.md`](./ROADMAP.md) for the full development plan.
-
----
-
-## Five-Minute Quickstart
-
-Paste & run each block in your terminal.
-
----
-
-### 0 Prepare & build
-
-```bash
-# Clone or cd into your Marlin repo
-cd ~/Documents/GitHub/Marlin
-
-# Build the release binary
-cargo build --release
-```
-
----
-
-### 1 Install on your PATH
-
-```bash
-sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
-```
-
-> Now `marlin` is available everywhere.
-
----
-
-### 2 Prepare a clean demo directory
-
-```bash
-rm -rf ~/marlin_demo
-mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
-
-printf "Alpha draft\n" > ~/marlin_demo/Projects/Alpha/draft.txt
-printf "Beta notes\n" > ~/marlin_demo/Projects/Beta/notes.md
-printf "Receipt PDF\n" > ~/marlin_demo/Docs/receipt.pdf
-printf "fake jpg\n" > ~/marlin_demo/Media/Photos/vacation.jpg
-```
-
----
-
-### 3 Initialize & index files
-
-```bash
-# Use --verbose if you want full debug traces:
-marlin init
-marlin scan ~/marlin_demo
-
-# or, to see every path tested:
-marlin --verbose init
-marlin --verbose scan ~/marlin_demo
-```
-
-> **Tip:** Rerun `marlin scan` after you add/remove/modify files; only changed files get re-indexed.
-
----
-
-### 4 Attach tags & attributes
-
-```bash
-# Tag everything under “Alpha”
-marlin tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha
-
-# Mark all PDFs as reviewed
-marlin attr set "~/marlin_demo/**/*.pdf" reviewed yes
-
-# (or with debug)
-marlin --verbose tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha
-marlin --verbose attr set "~/marlin_demo/**/*.pdf" reviewed yes
-```
-
----
-
-### 5 Search your index
-
-```bash
-# By tag or filename
-marlin search alpha
-
-# Combined terms (AND across path+attrs)
-marlin search "reviewed AND pdf"
-
-# Run a command on each hit
-marlin search reviewed --exec "echo HIT → {}"
-
-# If things aren’t matching, add --verbose to see the underlying FTS query:
-marlin --verbose search "reviewed AND pdf"
-```
-
-> `{}` in `--exec` is replaced with each file’s path.
-
----
-
-### 6 Backup & restore
-
-```bash
-# Snapshot and store its name
-snap=$(marlin backup | awk '{print $NF}')
-
-# Simulate data loss
-rm ~/.local/share/marlin/index.db
-
-# Restore instantly
-marlin restore "$snap"
-
-# Verify your files still show up
-marlin search reviewed
-```
-
-> Backups live under `~/.local/share/marlin/backups` by default.
-
-##### What you just exercised
-
-| Command | Purpose |
-| ----------------- | ----------------------------------------- |
-| `marlin init` | Create / upgrade the SQLite database |
-| `marlin scan` | Walk directories and (re)index files |
-| `marlin tag` | Attach hierarchical tags |
-| `marlin attr set` | Add/overwrite custom key-value attributes |
-| `marlin search` | FTS5 search across path / tags / attrs |
-| `--exec` | Pipe hits into any shell command |
-| `marlin backup` | Timestamped snapshot of the DB |
-| `marlin restore` | Replace live DB with a chosen snapshot |
-
-That’s the complete surface area of Marlin today—feel free to play around or
-point the scanner at real folders.
-
+Versioned migrations preserve your data across upgrades.
---
## License
-MIT – see `LICENSE`
+MIT – see [`LICENSE`](LICENSE).
+
diff --git a/bar.txt b/bar.txt
new file mode 100644
index 0000000..e69de29
diff --git a/foo.txt b/foo.txt
new file mode 100644
index 0000000..e69de29
diff --git a/marlin_demo.md b/marlin_demo.md
new file mode 100644
index 0000000..28c2b14
--- /dev/null
+++ b/marlin_demo.md
@@ -0,0 +1,183 @@
+# Marlin Demo 🚀
+
+Below is a **“hello-world” walk-through** that matches the current `main`
+branch (auto-scan on `marlin init`, no more forced-migration chatter, cleaner
+build). Everything runs offline on a throw-away directory under `~/marlin_demo`.
+
+---
+
+## 0 Build & install Marlin
+
+```bash
+# inside the repo
+export CARGO_TARGET_DIR=target # <-- speeds up future builds (once)
+cargo build --release # build the new binary
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
+# (cargo install --path . --locked --force works too)
+````
+
+---
+
+## 1 Create the demo tree
+
+```bash
+rm -rf ~/marlin_demo
+mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta,Gamma},Logs,Reports,Scripts,Media/Photos}
+# (zsh users: quote the pattern or enable braceexpand first)
+
+# ── Projects ───────────────────────────────────────────────────
+cat < ~/marlin_demo/Projects/Alpha/draft1.md
+# Alpha draft 1
+- [ ] TODO: outline architecture
+- [ ] TODO: write tests
+EOF
+cat < ~/marlin_demo/Projects/Alpha/draft2.md
+# Alpha draft 2
+- [x] TODO: outline architecture
+- [ ] TODO: implement feature X
+EOF
+cat < ~/marlin_demo/Projects/Beta/notes.md
+Beta meeting notes:
+
+- decided on roadmap
+- ACTION: follow-up with design team
+EOF
+cat < ~/marlin_demo/Projects/Beta/final.md
+# Beta Final
+All tasks complete. Ready to ship!
+EOF
+cat < ~/marlin_demo/Projects/Gamma/TODO.txt
+Gamma tasks:
+TODO: refactor module Y
+EOF
+
+# ── Logs & Reports ─────────────────────────────────────────────
+echo "2025-05-15 12:00:00 INFO Starting app" > ~/marlin_demo/Logs/app.log
+echo "2025-05-15 12:01:00 ERROR Oops, crash" >> ~/marlin_demo/Logs/app.log
+echo "2025-05-15 00:00:00 INFO System check OK" > ~/marlin_demo/Logs/system.log
+printf "Q1 financials\n" > ~/marlin_demo/Reports/Q1_report.pdf
+
+# ── Scripts & Media ────────────────────────────────────────────
+cat <<'EOF' > ~/marlin_demo/Scripts/deploy.sh
+#!/usr/bin/env bash
+echo "Deploying version $1…"
+EOF
+chmod +x ~/marlin_demo/Scripts/deploy.sh
+echo "JPEGDATA" > ~/marlin_demo/Media/Photos/event.jpg
+```
+
+---
+
+## 2 Initialise **and** index (one step)
+
+```bash
+cd ~/marlin_demo # run init from the folder you want indexed
+marlin init # • creates or migrates DB
+ # • runs *first* full scan of this directory
+```
+
+Add more directories later with `marlin scan `.
+
+---
+
+## 3 Tagging examples
+
+```bash
+# Tag all project markdown as ‘project/md’
+marlin tag '~/marlin_demo/Projects/**/*.md' project/md
+
+# Tag your logs
+marlin tag '~/marlin_demo/Logs/**/*.log' logs/app
+
+# Tag everything under Beta as ‘project/beta’
+marlin tag '~/marlin_demo/Projects/Beta/**/*' project/beta
+```
+
+---
+
+## 4 Set custom attributes
+
+```bash
+marlin attr set '~/marlin_demo/Projects/Beta/final.md' status complete
+marlin attr set '~/marlin_demo/Reports/*.pdf' reviewed yes
+```
+
+---
+
+## 5 Play with search / exec hooks
+
+```bash
+marlin search TODO
+marlin search tag:project/md
+marlin search 'tag:logs/app AND ERROR'
+marlin search 'attr:status=complete'
+marlin search 'attr:reviewed=yes AND pdf'
+marlin search 'attr:reviewed=yes' --exec 'xdg-open {}'
+marlin --format=json search 'attr:status=complete' # machine-readable output
+```
+
+---
+
+## 6 Verbose mode
+
+```bash
+marlin --verbose scan ~/marlin_demo # watch debug logs stream by
+```
+
+---
+
+## 7 Snapshot & restore
+
+```bash
+snap=$(marlin backup | awk '{print $NF}')
+rm ~/.local/share/marlin/index.db # simulate disaster
+marlin restore "$snap"
+marlin search TODO # still works
+```
+
+*(Reminder: Marlin also makes an **auto-backup** before every non-`init`
+command, so manual snapshots are extra insurance.)*
+
+---
+
+## 8 Linking demo
+
+```bash
+touch ~/marlin_demo/foo.txt ~/marlin_demo/bar.txt
+marlin scan ~/marlin_demo # index the new files
+
+foo=~/marlin_demo/foo.txt
+bar=~/marlin_demo/bar.txt
+
+marlin link add "$foo" "$bar" --type references # create typed link
+marlin link list "$foo" # outgoing links from foo
+marlin link backlinks "$bar" # incoming links to bar
+```
+
+---
+
+## 9 Collections & smart views
+
+```bash
+# Collection
+marlin coll create SetA
+marlin coll add SetA '~/marlin_demo/Projects/**/*.md'
+marlin coll list SetA
+
+# Saved view (smart folder)
+marlin view save tasks 'attr:status=complete OR TODO'
+marlin view exec tasks
+```
+
+---
+
+### Recap
+
+* `cargo build --release` + `sudo install …` is still the build path.
+* **`marlin init`** scans the **current working directory** on first run.
+* Scan again only when you add *new* directories (`marlin scan …`).
+* Auto-backups happen before every command; manual `marlin backup` gives you extra restore points.
+
+Happy organising!
+
+```
\ No newline at end of file
diff --git a/roadmap.md b/roadmap.md
index 9032721..c45e256 100644
--- a/roadmap.md
+++ b/roadmap.md
@@ -1,40 +1,59 @@
-Here’s the updated roadmap with each new feature slotted in where its dependencies are best met:
+# Marlin Roadmap 2025 → 2026 📜
-| Phase | Focus | Why now? | Key deliverables |
-| -------------------------- | -------------------------------------- | --------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
-| **1. 2025-Q2 – “Bedrock”** | Migrations + CI baseline + core schema | We’ve stabilized migrations; now add foundational tables for links, groups, views | • CI: `cargo test` + `cargo sqlx migrate run --dry-run`
• New migrations: |
+This document outlines the **official delivery plan** for Marlin over the next four quarters.
+Every work-item below is *time-boxed, testable,* and traceable back to an end-user benefit.
-* `links(src_file,dst_file,link_type)`
-* `collections(name)` + `collection_files`
-* `views(name,query)`
• CLI stubs for `marlin link` / `unlink` / `list-links` / `backlinks`, `marlin coll` and `marlin view` |
- \| **2. 2025-Q2** | Leaner FTS maintenance | Per-row triggers don’t scale past \~100 k files | • Replace per-row triggers with a “dirty” flag + periodic rebuild
• Benchmark end-to-end on 100 k files |
- \| **2.1 2025-Q2** | Dirty-row FTS + CI | Prep for both scale and live-watcher—avoid full rebuilds on every change | • `scan --dirty` reindexes only changed files
• CI coverage for dirty-scan edge cases |
- \| **2.2 2025-Q2** | Live file watching | Offer true “working-dir” mode—auto-scan on FS events | • `marlin watch [dir]` via `notify` crate
• Incremental scan on create/modify/delete/rename |
- \| **2.3 2025-Q2** | Self-pruning backups | Instant protection and bounded storage—no manual snapshot cleanup | • `marlin backup --prune ` flag
• Post-scan hook to prune to latest 10
• Daily prune automation (cron or CI) |
- \| **3. 2025-Q3** | FTS5 content indexing & annotations | Full-text search over file bodies + per-file notes/highlights | • Add `files.content` column + migration
• Extend `files_fts` to include `content`
• New `annotations` table + FTS triggers
• CLI: `marlin annotate add|list` |
- \| **4. 2025-Q3** | Content hashing, dedup & versioning | Detect duplicates, track history, enable diffs | • Populate `files.hash` with SHA-256
• `scan --rehash` option
• CLI: `marlin version diff ` |
- \| **5. 2025-Q3** | Tag aliases/canonicals & semantic/AI enhancements | Control tag sprawl and lay groundwork for AI-driven suggestions | • Enforce `canonical_id` on `tags` + `tag alias add|ls|rm` CLI
• Create `embeddings` table
• `scan --embed` to generate vectors
• CLI: `marlin tag suggest`, `marlin summary `, `marlin similarity scan` |
- \| **6. 2025-Q4** | Search DSL v2 & Smart Views | More powerful query grammar + reusable “virtual folders” | • Replace ad-hoc parser with a `nom`-based grammar
• CLI: `marlin view save|list|exec` |
- \| **7. 2025-Q4** | Attribute templates, states, tasks & timeline | Structured metadata unlocks workflows, reminders & temporal context | • `templates` + `template_fields` tables + validation
• CLI:
-* `marlin state set|transitions add|state log`
-* `marlin task scan|task list`
-* `marlin remind set ""`
-* `marlin event add ""`, `marlin timeline` |
- \| **8. 2026-Q1** | Dolphin read-only plugin | Surface metadata, links, annotations in native file manager | • Qt sidebar showing tags, attributes, links, annotations |
- \| **9. 2026-Q1** | Full edit UI | After proving read-only stable, add in-place editing | • Tag editor, collection & view managers, state/task/event dialogs |
- \| **10. 2026-Q2** | Multi-device sync | Final frontier: optional sync/replication layer | • Choose sync backend (rqlite / Litestream / bespoke)
• Support read-only mounts for remote indexes |
+> **Legend**
+> ✅ = item added/clarified in the latest planning round
+> Δ = new sub-deliverable (wasn’t in the previous version)
---
-### Current sprint (ends **2025-06-01**)
+## 1 Bird’s-eye Table
-1. FTS rebuild prototype (dirty-rows) – measure on 50 k files
-2. `backup --prune` implementation + auto-prune hook
-3. Integration tests for tag/attr workflows on Windows via GitHub Actions
-4. **New:** basic `links`, `collections`, `views` migrations + CLI stubs
+| Phase / Sprint | Timeline | Focus & Rationale | Key Deliverables (Δ = new) | | |
+| ----------------------------------------------- | ------------------------- | ------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------ |
+| **Sprint α – Bedrock & Metadata Domains** | **2025-Q2 (now → 6 Jun)** | Stabilise schema & CI; land first metadata domains with discoverability. | Δ CI: `cargo test` + SQL dry-run
Δ Unit tests (`determine_scan_root`, `escape_fts`)
Δ Coverage: e2e `attr --format=json`
Δ Refactor: move `naive_substring_search` to shared util
Migrations: `links`, `collections`, `views`
CLI stubs: `link`, `coll`, `view`
`marlin demo` walkthrough | | |
+| **Epic 1 – Scale & Reliability** | 2025-Q2 | Keep scans fast; bullet-proof CI at 100 k files. | Δ Dirty-flag column + `scan --dirty`
Benchmarks: full vs dirty scan (100 k)
Replace per-row triggers with periodic rebuild
CI edge-case tests | | |
+| **Epic 2 – Live Mode & Self-Pruning Backups** | 2025-Q2 | Continuous indexing & hygiene—Marlin “just works”. | Δ `marlin watch [dir]` (notify/FSEvents)
Δ `backup --prune ` + auto-prune post-scan
Daily / PR-merge prune in CI | | |
+| **Phase 3 – Content FTS & Annotations** | 2025-Q3 | Index file bodies, grep-style context, inline notes. | `files.content` + migration
Extend `files_fts` (context snippets `-C`)
`annotations` table + triggers
CLI \`annotate add | list\` | |
+| **Phase 4 – Versioning & Deduplication** | 2025-Q3 | History, diffs & duplicate detection. | `files.hash` (SHA-256)
`scan --rehash` refresh
CLI `version diff ` | | |
+| **Phase 5 – Tag Aliases & Semantic Booster** | 2025-Q3 | Tame tag sprawl; seed AI-powered suggestions. | `canonical_id` on `tags`; CLI `tag alias …`
`embeddings` table + `scan --embed`
CLI `tag suggest`, `similarity scan`, `summary ` | | |
+| **Phase 6 – Search DSL v2 & Smart Views** | 2025-Q4 | Robust grammar + virtual folders. | Replace parser with **`nom`** grammar (`AND`, `OR`, `()` …)
CLI \`view save | list | exec\` with aliases & paging |
+| **Phase 7 – Structured Workflows** | 2025-Q4 | First-class task / state / reminder / event life-cycles. | ✅ State engine (`files.state`, `state_changes`)
CLI \`state set | transitions add | log`
✅ Task extractor (`tasks` table) + CLI
`templates`+ validation
CLI`remind …`, `event …`, `timeline\` |
+| **Phase 8 – Lightweight Integrations** | 2026-Q1 | Surface Marlin in editors / terminal. | VS Code & TUI extension (tags / attrs / links / notes) | | |
+| **Phase 9 – Dolphin Sidebar Plugin (MVP)** | 2026-Q1 | Read-only Qt sidebar for Linux file managers. | Qt plug-in: tags, attrs, links, annotations | | |
+| **Phase 10 – Full Edit UI & Multi-Device Sync** | 2026-Q2 | In-place metadata editor & optional sync layer. | GUI editors (tags, views, tasks, reminders, events)
Pick/implement sync backend (rqlite, Litestream, …) | | |
-**Development principles remain**:
+---
-* Local-first, offline-capable
-* Ship code = ship migrations
-* Instrumentation first (trace spans & timings on all new commands)
+## 2 Narrative & Dependencies
+
+1. **Lock down core schema & demo** *(Sprint α).*
+ Developers get immediate feedback via the `marlin demo` command while CI ensures migrations never regress.
+
+2. **Scale & Live Mode** *(Epics 1-2).*
+ Dirty scanning, file-watching and auto-pruned backups guarantee snappy, hands-off operation even on six-figure corpora.
+
+3. **Richer Search** *(Phases 3-6).*
+ Body-content FTS + grep-style snippets lay the groundwork; `nom` grammar then elevates power-user queries and smart views.
+
+4. **Workflow Layers** *(Phase 7).*
+ State transitions, tasks and reminders turn Marlin from a passive index into an active workflow engine.
+
+5. **UX Expansions** *(Phases 8-10).*
+ Start lightweight (VS Code / TUI), graduate to a read-only Dolphin plug-in, then ship full editing & sync for multi-device teams.
+
+Every outer milestone depends only on the completion of the rows above it, **so shipping discipline in early sprints de-risks the headline features down the line.**
+
+---
+
+## 3 Next Steps
+
+* **Sprint α kickoff:** break deliverables into stories, estimate, assign.
+* **Add roadmap as `docs/ROADMAP.md`** (this file).
+* Wire a **Checklist issue** on GitHub: one task per Δ bullet for instant tracking.
+
+---
+
+*Last updated · 2025-05-16*
diff --git a/src/cli.rs b/src/cli.rs
index b444222..bcb2bde 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -1,15 +1,36 @@
// src/cli.rs
-use std::path::PathBuf;
-use clap::{Parser, Subcommand};
+pub mod link;
+pub mod coll;
+pub mod view;
+pub mod state;
+pub mod task;
+pub mod remind;
+pub mod annotate;
+pub mod version;
+pub mod event;
+
+use clap::{Parser, Subcommand, ValueEnum};
+use clap_complete::Shell;
+
+/// Output format for commands.
+#[derive(ValueEnum, Clone, Copy, Debug)]
+pub enum Format {
+ Text,
+ Json,
+}
/// Marlin – metadata-driven file explorer (CLI utilities)
#[derive(Parser, Debug)]
-#[command(author, version, about)]
+#[command(author, version, about, propagate_version = true)]
pub struct Cli {
/// Enable debug logging and extra output
#[arg(long)]
pub verbose: bool,
+ /// Output format (text or JSON)
+ #[arg(long, default_value = "text", value_enum, global = true)]
+ pub format: Format,
+
#[command(subcommand)]
pub command: Commands,
}
@@ -21,12 +42,15 @@ pub enum Commands {
/// Scan one or more directories and populate the file index
Scan {
- paths: Vec,
+ /// Directories to scan (defaults to cwd)
+ paths: Vec,
},
/// Tag files matching a glob pattern (hierarchical tags use `/`)
Tag {
+ /// Glob or path pattern
pattern: String,
+ /// Hierarchical tag name (`foo/bar`)
tag_path: String,
},
@@ -46,14 +70,58 @@ pub enum Commands {
/// Create a timestamped backup of the database
Backup,
- /// Restore from a backup file (over-writes current DB)
+ /// Restore from a backup file (overwrites current DB)
Restore {
- backup_path: PathBuf,
+ backup_path: std::path::PathBuf,
},
+
+ /// Generate shell completions (hidden)
+ #[command(hide = true)]
+ Completions {
+ /// Which shell to generate for
+ #[arg(value_enum)]
+ shell: Shell,
+ },
+
+ /// File-to-file links
+ #[command(subcommand)]
+ Link(link::LinkCmd),
+
+ /// Collections (groups) of files
+ #[command(subcommand)]
+ Coll(coll::CollCmd),
+
+ /// Smart views (saved queries)
+ #[command(subcommand)]
+ View(view::ViewCmd),
+
+ /// Workflow states on files
+ #[command(subcommand)]
+ State(state::StateCmd),
+
+ /// TODO/tasks management
+ #[command(subcommand)]
+ Task(task::TaskCmd),
+
+ /// Reminders on files
+ #[command(subcommand)]
+ Remind(remind::RemindCmd),
+
+ /// File annotations and highlights
+ #[command(subcommand)]
+ Annotate(annotate::AnnotateCmd),
+
+ /// Version diffs
+ #[command(subcommand)]
+ Version(version::VersionCmd),
+
+ /// Calendar events & timelines
+ #[command(subcommand)]
+ Event(event::EventCmd),
}
#[derive(Subcommand, Debug)]
pub enum AttrCmd {
Set { pattern: String, key: String, value: String },
- Ls { path: PathBuf },
+ Ls { path: std::path::PathBuf },
}
diff --git a/src/cli/annotate.rs b/src/cli/annotate.rs
new file mode 100644
index 0000000..50db9d5
--- /dev/null
+++ b/src/cli/annotate.rs
@@ -0,0 +1,28 @@
+// src/cli/annotate.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum AnnotateCmd {
+ Add (ArgsAdd),
+ List(ArgsList),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsAdd {
+ pub file: String,
+ pub note: String,
+ #[arg(long)] pub range: Option,
+ #[arg(long)] pub highlight: bool,
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsList { pub file_pattern: String }
+
+pub fn run(cmd: &AnnotateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ AnnotateCmd::Add(a) => todo!("annotate add {:?}", a),
+ AnnotateCmd::List(a) => todo!("annotate list {:?}", a),
+ }
+}
diff --git a/src/cli/coll.rs b/src/cli/coll.rs
new file mode 100644
index 0000000..76a40f7
--- /dev/null
+++ b/src/cli/coll.rs
@@ -0,0 +1,108 @@
+//! `marlin coll …` – named collections of files (simple “playlists”).
+
+use clap::{Args, Subcommand};
+use rusqlite::Connection;
+
+use crate::{
+ cli::Format,
+ db,
+};
+
+#[derive(Subcommand, Debug)]
+pub enum CollCmd {
+ /// Create an empty collection
+ Create(CreateArgs),
+ /// Add files (glob) to a collection
+ Add(AddArgs),
+ /// List files inside a collection
+ List(ListArgs),
+}
+
+#[derive(Args, Debug)]
+pub struct CreateArgs {
+ pub name: String,
+}
+
+#[derive(Args, Debug)]
+pub struct AddArgs {
+ pub name: String,
+ pub file_pattern: String,
+}
+
+#[derive(Args, Debug)]
+pub struct ListArgs {
+ pub name: String,
+}
+
+/// Look-up an existing collection **without** implicitly creating it.
+///
+/// Returns the collection ID or an error if it doesn’t exist.
+fn lookup_collection_id(conn: &Connection, name: &str) -> anyhow::Result {
+ conn.query_row(
+ "SELECT id FROM collections WHERE name = ?1",
+ [name],
+ |r| r.get(0),
+ )
+ .map_err(|_| anyhow::anyhow!("collection not found: {}", name))
+}
+
+pub fn run(cmd: &CollCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> {
+ match cmd {
+ /* ── coll create ──────────────────────────────────────────── */
+ CollCmd::Create(a) => {
+ db::ensure_collection(conn, &a.name)?;
+ if matches!(fmt, Format::Text) {
+ println!("Created collection '{}'", a.name);
+ }
+ }
+
+ /* ── coll add ─────────────────────────────────────────────── */
+ CollCmd::Add(a) => {
+ // Fail if the target collection does not yet exist
+ let coll_id = lookup_collection_id(conn, &a.name)?;
+
+ let like = a.file_pattern.replace('*', "%");
+ let mut stmt = conn.prepare("SELECT id FROM files WHERE path LIKE ?1")?;
+ let ids: Vec = stmt
+ .query_map([&like], |r| r.get::<_, i64>(0))?
+ .collect::>()?;
+
+ for fid in &ids {
+ db::add_file_to_collection(conn, coll_id, *fid)?;
+ }
+
+ match fmt {
+ Format::Text => println!("Added {} file(s) → '{}'", ids.len(), a.name),
+ Format::Json => {
+ #[cfg(feature = "json")]
+ {
+ println!(
+ "{{\"collection\":\"{}\",\"added\":{}}}",
+ a.name,
+ ids.len()
+ );
+ }
+ }
+ }
+ }
+
+ /* ── coll list ────────────────────────────────────────────── */
+ CollCmd::List(a) => {
+ let files = db::list_collection(conn, &a.name)?;
+ match fmt {
+ Format::Text => {
+ for f in files {
+ println!("{f}");
+ }
+ }
+ Format::Json => {
+ #[cfg(feature = "json")]
+ {
+ println!("{}", serde_json::to_string(&files)?);
+ }
+ }
+ }
+ }
+ }
+ Ok(())
+}
diff --git a/src/cli/commands.yaml b/src/cli/commands.yaml
new file mode 100644
index 0000000..19ea663
--- /dev/null
+++ b/src/cli/commands.yaml
@@ -0,0 +1,81 @@
+# cli/commands.yaml
+# Philosophy: one canonical spec stops drift between docs & code.
+link:
+ description: "Manage typed relationships between files"
+ actions:
+ add:
+ args: [from, to]
+ flags: ["--type"]
+ rm:
+ args: [from, to]
+ flags: ["--type"]
+ list:
+ args: [pattern]
+ flags: ["--direction", "--type"]
+ backlinks:
+ args: [pattern]
+
+coll:
+ description: "Manage named collections of files"
+ actions:
+ create:
+ args: [name]
+ add:
+ args: [name, file_pattern]
+ list:
+ args: [name]
+
+view:
+ description: "Save and use smart views (saved queries)"
+ actions:
+ save:
+ args: [view_name, query]
+ list: {}
+ exec:
+ args: [view_name]
+
+state:
+ description: "Track workflow states on files"
+ actions:
+ set:
+ args: [file_pattern, new_state]
+ transitions-add:
+ args: [from_state, to_state]
+ log:
+ args: [file_pattern]
+
+task:
+ description: "Extract TODOs and manage tasks"
+ actions:
+ scan:
+ args: [directory]
+ list:
+ flags: ["--due-today"]
+
+remind:
+ description: "Attach reminders to files"
+ actions:
+ set:
+ args: [file_pattern, timestamp, message]
+
+annotate:
+ description: "Add notes or highlights to files"
+ actions:
+ add:
+ args: [file, note]
+ flags: ["--range", "--highlight"]
+ list:
+ args: [file_pattern]
+
+version:
+ description: "Versioning and diffs"
+ actions:
+ diff:
+ args: [file]
+
+event:
+ description: "Link files to dates/events"
+ actions:
+ add:
+ args: [file, date, description]
+ timeline: {}
diff --git a/src/cli/event.rs b/src/cli/event.rs
new file mode 100644
index 0000000..6988be6
--- /dev/null
+++ b/src/cli/event.rs
@@ -0,0 +1,24 @@
+// src/cli/event.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum EventCmd {
+ Add (ArgsAdd),
+ Timeline,
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsAdd {
+ pub file: String,
+ pub date: String,
+ pub description: String,
+}
+
+pub fn run(cmd: &EventCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ EventCmd::Add(a) => todo!("event add {:?}", a),
+ EventCmd::Timeline => todo!("event timeline"),
+ }
+}
diff --git a/src/cli/link.rs b/src/cli/link.rs
new file mode 100644
index 0000000..16c23c1
--- /dev/null
+++ b/src/cli/link.rs
@@ -0,0 +1,155 @@
+// src/cli/link.rs
+
+use crate::db;
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum LinkCmd {
+ Add(LinkArgs),
+ Rm (LinkArgs),
+ List(ListArgs),
+ Backlinks(BacklinksArgs),
+}
+
+#[derive(Args, Debug)]
+pub struct LinkArgs {
+ pub from: String,
+ pub to: String,
+ #[arg(long)]
+ pub r#type: Option,
+}
+
+#[derive(Args, Debug)]
+pub struct ListArgs {
+ pub pattern: String,
+ #[arg(long)]
+ pub direction: Option,
+ #[arg(long)]
+ pub r#type: Option,
+}
+
+#[derive(Args, Debug)]
+pub struct BacklinksArgs {
+ pub pattern: String,
+}
+
+pub fn run(cmd: &LinkCmd, conn: &mut Connection, format: Format) -> anyhow::Result<()> {
+ match cmd {
+ LinkCmd::Add(args) => {
+ let src_id = db::file_id(conn, &args.from)?;
+ let dst_id = db::file_id(conn, &args.to)?;
+ db::add_link(conn, src_id, dst_id, args.r#type.as_deref())?;
+ match format {
+ Format::Text => {
+ if let Some(t) = &args.r#type {
+ println!("Linked '{}' → '{}' [type='{}']", args.from, args.to, t);
+ } else {
+ println!("Linked '{}' → '{}'", args.from, args.to);
+ }
+ }
+ Format::Json => {
+ let typ = args
+ .r#type
+ .as_ref()
+ .map(|s| format!("\"{}\"", s))
+ .unwrap_or_else(|| "null".into());
+ println!(
+ "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
+ args.from, args.to, typ
+ );
+ }
+ }
+ }
+ LinkCmd::Rm(args) => {
+ let src_id = db::file_id(conn, &args.from)?;
+ let dst_id = db::file_id(conn, &args.to)?;
+ db::remove_link(conn, src_id, dst_id, args.r#type.as_deref())?;
+ match format {
+ Format::Text => {
+ if let Some(t) = &args.r#type {
+ println!("Removed link '{}' → '{}' [type='{}']", args.from, args.to, t);
+ } else {
+ println!("Removed link '{}' → '{}'", args.from, args.to);
+ }
+ }
+ Format::Json => {
+ let typ = args
+ .r#type
+ .as_ref()
+ .map(|s| format!("\"{}\"", s))
+ .unwrap_or_else(|| "null".into());
+ println!(
+ "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
+ args.from, args.to, typ
+ );
+ }
+ }
+ }
+ LinkCmd::List(args) => {
+ let results = db::list_links(
+ conn,
+ &args.pattern,
+ args.direction.as_deref(),
+ args.r#type.as_deref(),
+ )?;
+ match format {
+ Format::Json => {
+ let items: Vec = results
+ .into_iter()
+ .map(|(src, dst, t)| {
+ let typ = t
+ .as_ref()
+ .map(|s| format!("\"{}\"", s))
+ .unwrap_or_else(|| "null".into());
+ format!(
+ "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
+ src, dst, typ
+ )
+ })
+ .collect();
+ println!("[{}]", items.join(","));
+ }
+ Format::Text => {
+ for (src, dst, t) in results {
+ if let Some(t) = t {
+ println!("{} → {} [type='{}']", src, dst, t);
+ } else {
+ println!("{} → {}", src, dst);
+ }
+ }
+ }
+ }
+ }
+ LinkCmd::Backlinks(args) => {
+ let results = db::find_backlinks(conn, &args.pattern)?;
+ match format {
+ Format::Json => {
+ let items: Vec = results
+ .into_iter()
+ .map(|(src, t)| {
+ let typ = t
+ .as_ref()
+ .map(|s| format!("\"{}\"", s))
+ .unwrap_or_else(|| "null".into());
+ format!("{{\"from\":\"{}\",\"type\":{}}}", src, typ)
+ })
+ .collect();
+ println!("[{}]", items.join(","));
+ }
+ Format::Text => {
+ for (src, t) in results {
+ if let Some(t) = t {
+ println!("{} [type='{}']", src, t);
+ } else {
+ println!("{}", src);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ Ok(())
+}
diff --git a/src/cli/remind.rs b/src/cli/remind.rs
new file mode 100644
index 0000000..99dac34
--- /dev/null
+++ b/src/cli/remind.rs
@@ -0,0 +1,22 @@
+// src/cli/remind.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum RemindCmd {
+ Set(ArgsSet),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsSet {
+ pub file_pattern: String,
+ pub timestamp: String,
+ pub message: String,
+}
+
+pub fn run(cmd: &RemindCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ RemindCmd::Set(a) => todo!("remind set {:?}", a),
+ }
+}
diff --git a/src/cli/state.rs b/src/cli/state.rs
new file mode 100644
index 0000000..7ac3628
--- /dev/null
+++ b/src/cli/state.rs
@@ -0,0 +1,26 @@
+// src/cli/state.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum StateCmd {
+ Set(ArgsSet),
+ TransitionsAdd(ArgsTrans),
+ Log(ArgsLog),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsSet { pub file_pattern: String, pub new_state: String }
+#[derive(Args, Debug)]
+pub struct ArgsTrans { pub from_state: String, pub to_state: String }
+#[derive(Args, Debug)]
+pub struct ArgsLog { pub file_pattern: String }
+
+pub fn run(cmd: &StateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ StateCmd::Set(a) => todo!("state set {:?}", a),
+ StateCmd::TransitionsAdd(a)=> todo!("state transitions-add {:?}", a),
+ StateCmd::Log(a) => todo!("state log {:?}", a),
+ }
+}
diff --git a/src/cli/task.rs b/src/cli/task.rs
new file mode 100644
index 0000000..57f9d4c
--- /dev/null
+++ b/src/cli/task.rs
@@ -0,0 +1,22 @@
+// src/cli/task.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum TaskCmd {
+ Scan(ArgsScan),
+ List(ArgsList),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsScan { pub directory: String }
+#[derive(Args, Debug)]
+pub struct ArgsList { #[arg(long)] pub due_today: bool }
+
+pub fn run(cmd: &TaskCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ TaskCmd::Scan(a) => todo!("task scan {:?}", a),
+ TaskCmd::List(a) => todo!("task list {:?}", a),
+ }
+}
diff --git a/src/cli/version.rs b/src/cli/version.rs
new file mode 100644
index 0000000..0c5bf26
--- /dev/null
+++ b/src/cli/version.rs
@@ -0,0 +1,18 @@
+// src/cli/version.rs
+use clap::{Subcommand, Args};
+use rusqlite::Connection;
+use crate::cli::Format;
+
+#[derive(Subcommand, Debug)]
+pub enum VersionCmd {
+ Diff(ArgsDiff),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsDiff { pub file: String }
+
+pub fn run(cmd: &VersionCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
+ match cmd {
+ VersionCmd::Diff(a) => todo!("version diff {:?}", a),
+ }
+}
diff --git a/src/cli/view.rs b/src/cli/view.rs
new file mode 100644
index 0000000..7f17ad7
--- /dev/null
+++ b/src/cli/view.rs
@@ -0,0 +1,168 @@
+//! `marlin view …` – save & use “smart folders” (named queries).
+
+use std::fs;
+
+use anyhow::Result;
+use clap::{Args, Subcommand};
+use rusqlite::Connection;
+
+use crate::{cli::Format, db};
+
+#[derive(Subcommand, Debug)]
+pub enum ViewCmd {
+ /// Save (or update) a view
+ Save(ArgsSave),
+ /// List all saved views
+ List,
+ /// Execute a view (print matching paths)
+ Exec(ArgsExec),
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsSave {
+ pub view_name: String,
+ pub query: String,
+}
+
+#[derive(Args, Debug)]
+pub struct ArgsExec {
+ pub view_name: String,
+}
+
+pub fn run(cmd: &ViewCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> {
+ match cmd {
+ /* ── view save ───────────────────────────────────────────── */
+ ViewCmd::Save(a) => {
+ db::save_view(conn, &a.view_name, &a.query)?;
+ if matches!(fmt, Format::Text) {
+ println!("Saved view '{}' = {}", a.view_name, a.query);
+ }
+ }
+
+ /* ── view list ───────────────────────────────────────────── */
+ ViewCmd::List => {
+ let views = db::list_views(conn)?;
+ match fmt {
+ Format::Text => {
+ for (name, q) in views {
+ println!("{name}: {q}");
+ }
+ }
+ Format::Json => {
+ #[cfg(feature = "json")]
+ {
+ println!("{}", serde_json::to_string(&views)?);
+ }
+ }
+ }
+ }
+
+ /* ── view exec ───────────────────────────────────────────── */
+ ViewCmd::Exec(a) => {
+ let raw = db::view_query(conn, &a.view_name)?;
+
+ // Re-use the tiny parser from marlin search
+ let fts_expr = build_fts_match(&raw);
+
+ let mut stmt = conn.prepare(
+ r#"
+ SELECT f.path
+ FROM files_fts
+ JOIN files f ON f.rowid = files_fts.rowid
+ WHERE files_fts MATCH ?1
+ ORDER BY rank
+ "#,
+ )?;
+ let mut paths: Vec = stmt
+ .query_map([fts_expr], |r| r.get::<_, String>(0))?
+ .collect::>()?;
+
+ /* ── NEW: graceful fallback when FTS finds nothing ───── */
+ if paths.is_empty() && !raw.contains(':') {
+ paths = naive_search(conn, &raw)?;
+ }
+
+ if paths.is_empty() && matches!(fmt, Format::Text) {
+ eprintln!("(view '{}' has no matches)", a.view_name);
+ } else {
+ for p in paths {
+ println!("{p}");
+ }
+ }
+ }
+ }
+ Ok(())
+}
+
+/* ─── naive substring path/content search (≤ 64 kB files) ───────── */
+
+fn naive_search(conn: &Connection, term: &str) -> Result> {
+ let term_lc = term.to_lowercase();
+ let mut stmt = conn.prepare("SELECT path FROM files")?;
+ let rows = stmt.query_map([], |r| r.get::<_, String>(0))?;
+
+ let mut hits = Vec::new();
+ for p in rows {
+ let p = p?;
+ /* path match */
+ if p.to_lowercase().contains(&term_lc) {
+ hits.push(p);
+ continue;
+ }
+ /* small-file content match */
+ if let Ok(meta) = fs::metadata(&p) {
+ if meta.len() > 64_000 {
+ continue;
+ }
+ }
+ if let Ok(content) = fs::read_to_string(&p) {
+ if content.to_lowercase().contains(&term_lc) {
+ hits.push(p);
+ }
+ }
+ }
+ Ok(hits)
+}
+
+/* ─── minimal copy of search-string → FTS5 translator ───────────── */
+
+fn build_fts_match(raw_query: &str) -> String {
+ use shlex;
+ let mut parts = Vec::new();
+ let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
+ for tok in toks {
+ if ["AND", "OR", "NOT"].contains(&tok.as_str()) {
+ parts.push(tok);
+ } else if let Some(tag) = tok.strip_prefix("tag:") {
+ for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() {
+ if i > 0 {
+ parts.push("AND".into());
+ }
+ parts.push(format!("tags_text:{}", escape(seg)));
+ }
+ } else if let Some(attr) = tok.strip_prefix("attr:") {
+ let mut kv = attr.splitn(2, '=');
+ let key = kv.next().unwrap();
+ if let Some(val) = kv.next() {
+ parts.push(format!("attrs_text:{}", escape(key)));
+ parts.push("AND".into());
+ parts.push(format!("attrs_text:{}", escape(val)));
+ } else {
+ parts.push(format!("attrs_text:{}", escape(key)));
+ }
+ } else {
+ parts.push(escape(&tok));
+ }
+ }
+ parts.join(" ")
+}
+
+fn escape(term: &str) -> String {
+ if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c))
+ || ["AND", "OR", "NOT", "NEAR"].contains(&term.to_uppercase().as_str())
+ {
+ format!("\"{}\"", term.replace('"', "\"\""))
+ } else {
+ term.to_string()
+ }
+}
diff --git a/src/config.rs b/src/config.rs
index 0cd4bc4..d0292a1 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,7 +1,10 @@
-use std::path::{Path, PathBuf};
-
use anyhow::Result;
use directories::ProjectDirs;
+use std::{
+ collections::hash_map::DefaultHasher,
+ hash::{Hash, Hasher},
+ path::{Path, PathBuf},
+};
/// Runtime configuration (currently just the DB path).
#[derive(Debug, Clone)]
@@ -10,22 +13,39 @@ pub struct Config {
}
impl Config {
- /// Resolve configuration from environment or XDG directories.
+ /// Resolve configuration from environment or derive one per-workspace.
+ ///
+ /// Priority:
+ /// 1. `MARLIN_DB_PATH` env-var (explicit override)
+ /// 2. *Workspace-local* file under XDG data dir
+ /// (`~/.local/share/marlin/index_.db`)
+ /// 3. Fallback to `./index.db` when we cannot locate an XDG dir
pub fn load() -> Result {
- let db_path = std::env::var_os("MARLIN_DB_PATH")
- .map(PathBuf::from)
- .or_else(|| {
- ProjectDirs::from("io", "Marlin", "marlin")
- .map(|dirs| dirs.data_dir().join("index.db"))
- })
- .unwrap_or_else(|| Path::new("index.db").to_path_buf());
+ // 1) explicit override
+ if let Some(val) = std::env::var_os("MARLIN_DB_PATH") {
+ let p = PathBuf::from(val);
+ std::fs::create_dir_all(p.parent().expect("has parent"))?;
+ return Ok(Self { db_path: p });
+ }
- std::fs::create_dir_all(
- db_path
- .parent()
- .expect("db_path should always have a parent directory"),
- )?;
+ // 2) derive per-workspace DB name from CWD hash
+ let cwd = std::env::current_dir()?;
+ let mut h = DefaultHasher::new();
+ cwd.hash(&mut h);
+ let digest = h.finish(); // 64-bit
+ let file_name = format!("index_{digest:016x}.db");
- Ok(Self { db_path })
+ if let Some(dirs) = ProjectDirs::from("io", "Marlin", "marlin") {
+ let dir = dirs.data_dir();
+ std::fs::create_dir_all(dir)?;
+ return Ok(Self {
+ db_path: dir.join(file_name),
+ });
+ }
+
+ // 3) very last resort – workspace-relative DB
+ Ok(Self {
+ db_path: Path::new(&file_name).to_path_buf(),
+ })
}
}
diff --git a/src/db/migrations/0003_create_links_collections_views.sql b/src/db/migrations/0003_create_links_collections_views.sql
new file mode 100644
index 0000000..7ffca89
--- /dev/null
+++ b/src/db/migrations/0003_create_links_collections_views.sql
@@ -0,0 +1,28 @@
+PRAGMA foreign_keys = ON;
+
+-- File-to-file links
+CREATE TABLE IF NOT EXISTS links (
+ id INTEGER PRIMARY KEY,
+ src_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ dst_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ type TEXT,
+ UNIQUE(src_file_id, dst_file_id, type)
+);
+
+-- Named collections
+CREATE TABLE IF NOT EXISTS collections (
+ id INTEGER PRIMARY KEY,
+ name TEXT NOT NULL UNIQUE
+);
+CREATE TABLE IF NOT EXISTS collection_files (
+ collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ PRIMARY KEY(collection_id, file_id)
+);
+
+-- Saved views
+CREATE TABLE IF NOT EXISTS views (
+ id INTEGER PRIMARY KEY,
+ name TEXT NOT NULL UNIQUE,
+ query TEXT NOT NULL
+);
diff --git a/src/db/migrations/0004_fix_hierarchical_tags_fts.sql b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql
new file mode 100644
index 0000000..273079e
--- /dev/null
+++ b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql
@@ -0,0 +1,289 @@
+-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+
+-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded
+DROP TRIGGER IF EXISTS files_fts_ai_file;
+DROP TRIGGER IF EXISTS files_fts_au_file;
+DROP TRIGGER IF EXISTS files_fts_ad_file;
+DROP TRIGGER IF EXISTS file_tags_fts_ai;
+DROP TRIGGER IF EXISTS file_tags_fts_ad;
+DROP TRIGGER IF EXISTS attributes_fts_ai;
+DROP TRIGGER IF EXISTS attributes_fts_au;
+DROP TRIGGER IF EXISTS attributes_fts_ad;
+
+-- Create a new trigger for file insertion that uses recursive CTE for full tag paths
+CREATE TRIGGER files_fts_ai_file
+AFTER INSERT ON files
+BEGIN
+ INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
+ VALUES (
+ NEW.id,
+ NEW.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = NEW.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = NEW.id)
+ );
+END;
+
+-- Recreate the file path update trigger
+CREATE TRIGGER files_fts_au_file
+AFTER UPDATE OF path ON files
+BEGIN
+ UPDATE files_fts
+ SET path = NEW.path
+ WHERE rowid = NEW.id;
+END;
+
+-- Recreate the file deletion trigger
+CREATE TRIGGER files_fts_ad_file
+AFTER DELETE ON files
+BEGIN
+ DELETE FROM files_fts WHERE rowid = OLD.id;
+END;
+
+-- Create new trigger for tag insertion that uses recursive CTE for full tag paths
+CREATE TRIGGER file_tags_fts_ai
+AFTER INSERT ON file_tags
+BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = NEW.file_id;
+END;
+
+-- Create new trigger for tag deletion that uses recursive CTE for full tag paths
+CREATE TRIGGER file_tags_fts_ad
+AFTER DELETE ON file_tags
+BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = OLD.file_id;
+END;
+
+-- Create new triggers for attribute operations that use recursive CTE for full tag paths
+CREATE TRIGGER attributes_fts_ai
+AFTER INSERT ON attributes
+BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = NEW.file_id;
+END;
+
+CREATE TRIGGER attributes_fts_au
+AFTER UPDATE OF value ON attributes
+BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = NEW.file_id;
+END;
+
+CREATE TRIGGER attributes_fts_ad
+AFTER DELETE ON attributes
+BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = OLD.file_id;
+END;
+
+-- Update all existing FTS entries with the new tag-path format
+INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path AS tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name AS tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+FROM files f;
diff --git a/src/db/mod.rs b/src/db/mod.rs
index 2e8286b..6a13a43 100644
--- a/src/db/mod.rs
+++ b/src/db/mod.rs
@@ -1,9 +1,12 @@
-// src/db/mod.rs
+//! Central DB helper – connection bootstrap, migrations **and** most
+//! data-access helpers (tags, links, collections, saved views, …).
+
use std::{
fs,
path::{Path, PathBuf},
};
+use std::result::Result as StdResult;
use anyhow::{Context, Result};
use chrono::Local;
use rusqlite::{
@@ -12,16 +15,20 @@ use rusqlite::{
Connection,
OpenFlags,
OptionalExtension,
+ TransactionBehavior,
};
-use tracing::{debug, info};
+use tracing::{debug, info, warn};
+
+/* ─── embedded migrations ─────────────────────────────────────────── */
-/// Embed every numbered migration file here.
const MIGRATIONS: &[(&str, &str)] = &[
("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")),
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
+ ("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
+ ("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
];
-/* ─── connection bootstrap ──────────────────────────────────────────── */
+/* ─── connection bootstrap ────────────────────────────────────────── */
pub fn open>(db_path: P) -> Result {
let db_path_ref = db_path.as_ref();
@@ -31,16 +38,18 @@ pub fn open>(db_path: P) -> Result {
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "foreign_keys", "ON")?;
- // Apply migrations (drops & recreates all FTS triggers)
- apply_migrations(&mut conn)?;
+ // Wait up to 30 s for a competing writer before giving up
+ conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked
+ apply_migrations(&mut conn)?;
Ok(conn)
}
-/* ─── migration runner ──────────────────────────────────────────────── */
+
+/* ─── migration runner ────────────────────────────────────────────── */
fn apply_migrations(conn: &mut Connection) -> Result<()> {
- // Ensure schema_version table
+ // Ensure schema_version bookkeeping table exists
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
@@ -48,10 +57,11 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
);",
)?;
- // Legacy patch (ignore if exists)
+ // Legacy patch – ignore errors if column already exists
let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []);
- let tx = conn.transaction()?;
+ // Grab the write-lock up-front so migrations can run uninterrupted
+ let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
for (fname, sql) in MIGRATIONS {
let version: i64 = fname
@@ -74,13 +84,8 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
}
info!("applying migration {}", fname);
- println!(
- "\nSQL SCRIPT FOR MIGRATION: {}\nBEGIN SQL >>>\n{}\n<<< END SQL\n",
- fname, sql
- );
-
tx.execute_batch(sql)
- .with_context(|| format!("could not apply migration {}", fname))?;
+ .with_context(|| format!("could not apply migration {fname}"))?;
tx.execute(
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
@@ -89,10 +94,31 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
}
tx.commit()?;
+
+ // sanity – warn if any embedded migration got skipped
+ let mut missing = Vec::new();
+ for (fname, _) in MIGRATIONS {
+ let v: i64 = fname.split('_').next().unwrap().parse().unwrap();
+ let ok: bool = conn
+ .query_row(
+ "SELECT 1 FROM schema_version WHERE version = ?1",
+ [v],
+ |_| Ok(true),
+ )
+ .optional()?
+ .unwrap_or(false);
+ if !ok {
+ missing.push(v);
+ }
+ }
+ if !missing.is_empty() {
+ warn!("migrations not applied: {:?}", missing);
+ }
+
Ok(())
}
-/* ─── helpers ───────────────────────────────────────────────────────── */
+/* ─── tag helpers ─────────────────────────────────────────────────── */
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result {
let mut parent: Option = None;
@@ -104,7 +130,7 @@ pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result {
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
- |row| row.get(0),
+ |r| r.get(0),
)?;
parent = Some(id);
}
@@ -116,6 +142,8 @@ pub fn file_id(conn: &Connection, path: &str) -> Result {
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
+/* ─── attributes ──────────────────────────────────────────────────── */
+
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
@@ -128,7 +156,161 @@ pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> R
Ok(())
}
-/* ─── backup / restore ──────────────────────────────────────────────── */
+/* ─── links ───────────────────────────────────────────────────────── */
+
+pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
+ conn.execute(
+ "INSERT INTO links(src_file_id, dst_file_id, type)
+ VALUES (?1, ?2, ?3)
+ ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING",
+ params![src_file_id, dst_file_id, link_type],
+ )?;
+ Ok(())
+}
+
+pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
+ conn.execute(
+ "DELETE FROM links
+ WHERE src_file_id = ?1
+ AND dst_file_id = ?2
+ AND (type IS ?3 OR type = ?3)",
+ params![src_file_id, dst_file_id, link_type],
+ )?;
+ Ok(())
+}
+
+pub fn list_links(
+ conn: &Connection,
+ pattern: &str,
+ direction: Option<&str>,
+ link_type: Option<&str>,
+) -> Result)>> {
+ let like_pattern = pattern.replace('*', "%");
+
+ // Files matching pattern
+ let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
+ let rows = stmt
+ .query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))?
+ .collect::, _>>()?;
+
+ let mut out = Vec::new();
+ for (fid, fpath) in rows {
+ let (src_col, dst_col) = match direction {
+ Some("in") => ("dst_file_id", "src_file_id"),
+ _ => ("src_file_id", "dst_file_id"),
+ };
+
+ let sql = format!(
+ "SELECT f2.path, l.type
+ FROM links l
+ JOIN files f2 ON f2.id = l.{dst_col}
+ WHERE l.{src_col} = ?1
+ AND (?2 IS NULL OR l.type = ?2)",
+ );
+
+ let mut stmt2 = conn.prepare(&sql)?;
+ let links = stmt2
+ .query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option>(1)?)))?
+ .collect::, _>>()?;
+
+ for (other, typ) in links {
+ out.push((fpath.clone(), other, typ));
+ }
+ }
+ Ok(out)
+}
+
+pub fn find_backlinks(
+ conn: &Connection,
+ pattern: &str,
+) -> Result)>> {
+ let like = pattern.replace('*', "%");
+
+ let mut stmt = conn.prepare(
+ "SELECT f1.path, l.type
+ FROM links l
+ JOIN files f1 ON f1.id = l.src_file_id
+ JOIN files f2 ON f2.id = l.dst_file_id
+ WHERE f2.path LIKE ?1",
+ )?;
+
+ let rows = stmt.query_map([like], |r| {
+ Ok((r.get::<_, String>(0)?, r.get::<_, Option>(1)?))
+ })?;
+
+ let out = rows.collect::, _>>()?; // rusqlite → anyhow via `?`
+ Ok(out)
+}
+
+/* ─── NEW: collections helpers ────────────────────────────────────── */
+
+pub fn ensure_collection(conn: &Connection, name: &str) -> Result {
+ conn.execute(
+ "INSERT OR IGNORE INTO collections(name) VALUES (?1)",
+ params![name],
+ )?;
+ conn.query_row(
+ "SELECT id FROM collections WHERE name = ?1",
+ params![name],
+ |r| r.get(0),
+ )
+ .context("collection lookup failed")
+}
+
+pub fn add_file_to_collection(conn: &Connection, coll_id: i64, file_id: i64) -> Result<()> {
+ conn.execute(
+ "INSERT OR IGNORE INTO collection_files(collection_id, file_id)
+ VALUES (?1, ?2)",
+ params![coll_id, file_id],
+ )?;
+ Ok(())
+}
+
+pub fn list_collection(conn: &Connection, name: &str) -> Result> {
+ let mut stmt = conn.prepare(
+ r#"SELECT f.path
+ FROM collections c
+ JOIN collection_files cf ON cf.collection_id = c.id
+ JOIN files f ON f.id = cf.file_id
+ WHERE c.name = ?1
+ ORDER BY f.path"#,
+ )?;
+
+ let rows = stmt.query_map([name], |r| r.get::<_, String>(0))?;
+ let list = rows.collect::, _>>()?;
+ Ok(list)
+}
+
+/* ─── NEW: saved views (smart folders) ────────────────────────────── */
+
+pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
+ conn.execute(
+ "INSERT INTO views(name, query)
+ VALUES (?1, ?2)
+ ON CONFLICT(name) DO UPDATE SET query = excluded.query",
+ params![name, query],
+ )?;
+ Ok(())
+}
+
+pub fn list_views(conn: &Connection) -> Result> {
+ let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
+
+ let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
+ let list = rows.collect::, _>>()?;
+ Ok(list)
+}
+
+pub fn view_query(conn: &Connection, name: &str) -> Result {
+ conn.query_row(
+ "SELECT query FROM views WHERE name = ?1",
+ [name],
+ |r| r.get::<_, String>(0),
+ )
+ .context(format!("no view called '{name}'"))
+}
+
+/* ─── backup / restore helpers ────────────────────────────────────── */
pub fn backup>(db_path: P) -> Result {
let src = db_path.as_ref();
@@ -153,3 +335,15 @@ pub fn restore>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}
+
+/* ─── tests ───────────────────────────────────────────────────────── */
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn migrations_apply_in_memory() {
+ open(":memory:").expect("all migrations apply");
+ }
+}
diff --git a/src/logging.rs b/src/logging.rs
index a0141ed..514fa0d 100644
--- a/src/logging.rs
+++ b/src/logging.rs
@@ -5,9 +5,13 @@ use tracing_subscriber::{fmt, EnvFilter};
/// Reads `RUST_LOG` for filtering, falls back to `info`.
pub fn init() {
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+
+ // All tracing output (INFO, WARN, ERROR …) now goes to *stderr* so the
+ // integration tests can assert on warnings / errors reliably.
fmt()
- .with_target(false)
- .with_level(true)
- .with_env_filter(filter)
+ .with_target(false) // hide module targets
+ .with_level(true) // include log level
+ .with_env_filter(filter) // respect RUST_LOG
+ .with_writer(std::io::stderr) // <-- NEW: send to stderr
.init();
}
diff --git a/src/main.rs b/src/main.rs
index 46fe75b..984fbec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,6 @@
// src/main.rs
+#![deny(warnings)]
+
mod cli;
mod config;
mod db;
@@ -6,46 +8,74 @@ mod logging;
mod scan;
use anyhow::{Context, Result};
-use clap::Parser;
+use clap::{CommandFactory, Parser};
+use clap_complete::generate;
use glob::Pattern;
use rusqlite::params;
use shellexpand;
use shlex;
-use std::{env, path::PathBuf, process::Command};
+use std::{
+ env,
+ fs,
+ io,
+ path::{Path, PathBuf},
+ process::Command,
+};
use tracing::{debug, error, info};
use walkdir::WalkDir;
-use cli::{AttrCmd, Cli, Commands};
+use cli::{Cli, Commands};
fn main() -> Result<()> {
- // Parse CLI and bootstrap logging
+ /* ── CLI parsing & logging ────────────────────────────────────── */
+
let args = Cli::parse();
if args.verbose {
- // switch on debug‐level logs
env::set_var("RUST_LOG", "debug");
}
logging::init();
- let cfg = config::Config::load()?;
+ /* ── shell-completion shortcut ───────────────────────────────── */
- // Backup before any non-init, non-backup/restore command
- if !matches!(args.command, Commands::Init | Commands::Backup | Commands::Restore { .. }) {
- match db::backup(&cfg.db_path) {
- Ok(path) => info!("Pre-command auto-backup created at {}", path.display()),
- Err(e) => error!("Failed to create pre-command auto-backup: {}", e),
- }
+ if let Commands::Completions { shell } = &args.command {
+ let mut cmd = Cli::command();
+ generate(*shell, &mut cmd, "marlin", &mut io::stdout());
+ return Ok(());
}
- // Open (and migrate) the DB
+ /* ── config & automatic backup ───────────────────────────────── */
+
+ let cfg = config::Config::load()?; // DB path, etc.
+
+ match &args.command {
+ Commands::Init | Commands::Backup | Commands::Restore { .. } => {}
+ _ => match db::backup(&cfg.db_path) {
+ Ok(path) => info!("Pre-command auto-backup created at {}", path.display()),
+ Err(e) => error!("Failed to create pre-command auto-backup: {e}"),
+ },
+ }
+
+ /* ── open DB (runs migrations if needed) ─────────────────────── */
+
let mut conn = db::open(&cfg.db_path)?;
+ /* ── command dispatch ────────────────────────────────────────── */
+
match args.command {
+ Commands::Completions { .. } => {} // already handled
+
Commands::Init => {
info!("Database initialised at {}", cfg.db_path.display());
+
+ // Always (re-)scan the current directory so even an existing DB
+ // picks up newly created files in the working tree.
+ let cwd = env::current_dir().context("getting current directory")?;
+ let count = scan::scan_directory(&mut conn, &cwd)
+ .context("initial scan failed")?;
+ info!("Initial scan complete – indexed/updated {count} files");
}
Commands::Scan { paths } => {
- // if none given, default to current dir
let scan_paths = if paths.is_empty() {
vec![env::current_dir()?]
} else {
@@ -56,22 +86,16 @@ fn main() -> Result<()> {
}
}
- Commands::Tag { pattern, tag_path } => {
- apply_tag(&conn, &pattern, &tag_path)?;
- }
+ Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?,
Commands::Attr { action } => match action {
- AttrCmd::Set { pattern, key, value } => {
- attr_set(&conn, &pattern, &key, &value)?;
- }
- AttrCmd::Ls { path } => {
- attr_ls(&conn, &path)?;
+ cli::AttrCmd::Set { pattern, key, value } => {
+ attr_set(&conn, &pattern, &key, &value)?
}
+ cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?,
},
- Commands::Search { query, exec } => {
- run_search(&conn, &query, exec)?;
- }
+ Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
Commands::Backup => {
let path = db::backup(&cfg.db_path)?;
@@ -79,48 +103,89 @@ fn main() -> Result<()> {
}
Commands::Restore { backup_path } => {
- drop(conn);
- db::restore(&backup_path, &cfg.db_path)
- .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?;
- println!("Restored DB file from {}", backup_path.display());
- db::open(&cfg.db_path)
- .with_context(|| format!("Could not open restored DB at {}", cfg.db_path.display()))?;
- info!("Successfully opened and processed restored database.");
+ drop(conn); // close handle before overwrite
+ db::restore(&backup_path, &cfg.db_path).with_context(|| {
+ format!("Failed to restore DB from {}", backup_path.display())
+ })?;
+ println!("Restored DB from {}", backup_path.display());
+ db::open(&cfg.db_path).with_context(|| {
+ format!("Could not open restored DB at {}", cfg.db_path.display())
+ })?;
+ info!("Successfully opened restored database.");
}
+
+ /* passthrough sub-modules that still stub out their logic */
+ Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?,
+ Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?,
+ Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?,
+ Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?,
+ Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?,
+ Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?,
+ Commands::Annotate(an_cmd) => cli::annotate::run(&an_cmd, &mut conn, args.format)?,
+ Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?,
+ Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?,
}
Ok(())
}
+/* ───────────────────────── helpers & sub-routines ───────────────── */
+
+/* ---------- TAGS ---------- */
+
/// Apply a hierarchical tag to all files matching the glob pattern.
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> {
- let tag_id = db::ensure_tag_path(conn, tag_path)?;
+ // ensure_tag_path returns the deepest-node ID
+ let leaf_tag_id = db::ensure_tag_path(conn, tag_path)?;
+
+ // collect that tag and all its ancestors
+ let mut tag_ids = Vec::new();
+ let mut current = Some(leaf_tag_id);
+ while let Some(id) = current {
+ tag_ids.push(id);
+ current = match conn.query_row(
+ "SELECT parent_id FROM tags WHERE id = ?1",
+ params![id],
+ |r| r.get::<_, Option>(0),
+ ) {
+ Ok(parent_id) => parent_id,
+ Err(rusqlite::Error::QueryReturnedNoRows) => None,
+ Err(e) => return Err(e.into()),
+ };
+ }
+
let expanded = shellexpand::tilde(pattern).into_owned();
- let pat = Pattern::new(&expanded)
- .with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
+ let pat = Pattern::new(&expanded)
+ .with_context(|| format!("Invalid glob pattern `{expanded}`"))?;
let root = determine_scan_root(&expanded);
- let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
- let mut stmt_insert =
- conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?;
+ let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
+ let mut stmt_insert = conn.prepare(
+ "INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
+ )?;
let mut count = 0;
- for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) {
+ for entry in WalkDir::new(&root)
+ .into_iter()
+ .filter_map(Result::ok)
+ .filter(|e| e.file_type().is_file())
+ {
let path_str = entry.path().to_string_lossy();
- debug!("testing path: {}", path_str);
if !pat.matches(&path_str) {
- debug!(" → no match");
continue;
}
- debug!(" → matched");
match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(file_id) => {
- if stmt_insert.execute(params![file_id, tag_id])? > 0 {
+ let mut newly = false;
+ for &tid in &tag_ids {
+ if stmt_insert.execute(params![file_id, tid])? > 0 {
+ newly = true;
+ }
+ }
+ if newly {
info!(file = %path_str, tag = tag_path, "tagged");
count += 1;
- } else {
- debug!(file = %path_str, tag = tag_path, "already tagged");
}
}
Err(rusqlite::Error::QueryReturnedNoRows) => {
@@ -132,42 +197,39 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu
}
}
- if count > 0 {
- info!("Applied tag '{}' to {} file(s).", tag_path, count);
- } else {
- info!("No new files were tagged with '{}' (no matches or already tagged).", tag_path);
- }
+ info!(
+ "Applied tag '{}' to {} file(s).",
+ tag_path, count
+ );
Ok(())
}
+/* ---------- ATTRIBUTES ---------- */
+
/// Set a key=value attribute on all files matching the glob pattern.
-fn attr_set(
- conn: &rusqlite::Connection,
- pattern: &str,
- key: &str,
- value: &str,
-) -> Result<()> {
+fn attr_set(conn: &rusqlite::Connection, pattern: &str, key: &str, value: &str) -> Result<()> {
let expanded = shellexpand::tilde(pattern).into_owned();
- let pat = Pattern::new(&expanded)
- .with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
+ let pat = Pattern::new(&expanded)
+ .with_context(|| format!("Invalid glob pattern `{expanded}`"))?;
let root = determine_scan_root(&expanded);
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut count = 0;
- for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) {
+ for entry in WalkDir::new(&root)
+ .into_iter()
+ .filter_map(Result::ok)
+ .filter(|e| e.file_type().is_file())
+ {
let path_str = entry.path().to_string_lossy();
- debug!("testing attr path: {}", path_str);
if !pat.matches(&path_str) {
- debug!(" → no match");
continue;
}
- debug!(" → matched");
match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(file_id) => {
db::upsert_attr(conn, file_id, key, value)?;
- info!(file = %path_str, key = key, value = value, "attr set");
+ info!(file = %path_str, key, value, "attr set");
count += 1;
}
Err(rusqlite::Error::QueryReturnedNoRows) => {
@@ -179,45 +241,64 @@ fn attr_set(
}
}
- if count > 0 {
- info!("Attribute '{}: {}' set on {} file(s).", key, value, count);
- } else {
- info!("No attributes set (no matches or not indexed).");
- }
+ info!(
+ "Attribute '{}={}' set on {} file(s).",
+ key, value, count
+ );
Ok(())
}
/// List attributes for a given file path.
-fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
+fn attr_ls(conn: &rusqlite::Connection, path: &Path) -> Result<()> {
let file_id = db::file_id(conn, &path.to_string_lossy())?;
- let mut stmt = conn.prepare(
- "SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key",
- )?;
- for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? {
+ let mut stmt =
+ conn.prepare("SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key")?;
+ for row in stmt
+ .query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?
+ {
let (k, v) = row?;
println!("{k} = {v}");
}
Ok(())
}
-/// Build and run an FTS5 search query, with optional exec.
+/* ---------- SEARCH ---------- */
+
+/// Run an FTS5 search, optionally piping each hit through `exec`.
+/// Falls back to a simple substring scan (path + ≤64 kB file contents)
+/// when the FTS index yields no rows.
fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option) -> Result<()> {
- let mut fts_query_parts = Vec::new();
- let parts = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
- for part in parts {
- if ["AND", "OR", "NOT"].contains(&part.as_str()) {
- fts_query_parts.push(part);
- } else if let Some(tag) = part.strip_prefix("tag:") {
- fts_query_parts.push(format!("tags_text:{}", escape_fts_query_term(tag)));
- } else if let Some(attr) = part.strip_prefix("attr:") {
- fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(attr)));
+ // Build the FTS MATCH expression
+ let mut parts = Vec::new();
+ let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
+ for tok in toks {
+ if ["AND", "OR", "NOT"].contains(&tok.as_str()) {
+ parts.push(tok);
+ } else if let Some(tag) = tok.strip_prefix("tag:") {
+ for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() {
+ if i > 0 {
+ parts.push("AND".into());
+ }
+ parts.push(format!("tags_text:{}", escape_fts(seg)));
+ }
+ } else if let Some(attr) = tok.strip_prefix("attr:") {
+ let mut kv = attr.splitn(2, '=');
+ let key = kv.next().unwrap();
+ if let Some(val) = kv.next() {
+ parts.push(format!("attrs_text:{}", escape_fts(key)));
+ parts.push("AND".into());
+ parts.push(format!("attrs_text:{}", escape_fts(val)));
+ } else {
+ parts.push(format!("attrs_text:{}", escape_fts(key)));
+ }
} else {
- fts_query_parts.push(escape_fts_query_term(&part));
+ parts.push(escape_fts(&tok));
}
}
- let fts_expr = fts_query_parts.join(" ");
- debug!("Constructed FTS MATCH expression: {}", fts_expr);
+ let fts_expr = parts.join(" ");
+ debug!("FTS MATCH expression: {fts_expr}");
+ // ---------- primary FTS query ----------
let mut stmt = conn.prepare(
r#"
SELECT f.path
@@ -227,51 +308,27 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option
ORDER BY rank
"#,
)?;
- let hits: Vec = stmt
- .query_map(params![fts_expr], |row| row.get(0))?
+ let mut hits: Vec = stmt
+ .query_map(params![fts_expr], |r| r.get::<_, String>(0))?
.filter_map(Result::ok)
.collect();
+ // ---------- graceful fallback ----------
+ if hits.is_empty() && !raw_query.contains(':') {
+ hits = naive_substring_search(conn, raw_query)?;
+ }
+
+ // ---------- output / exec ----------
if let Some(cmd_tpl) = exec {
- let mut ran_without_placeholder = false;
- if hits.is_empty() && !cmd_tpl.contains("{}") {
- if let Some(mut parts) = shlex::split(&cmd_tpl) {
- if !parts.is_empty() {
- let prog = parts.remove(0);
- let status = Command::new(&prog).args(&parts).status()?;
- if !status.success() {
- error!(command=%cmd_tpl, code=?status.code(), "command failed");
- }
- }
- }
- ran_without_placeholder = true;
- }
- if !ran_without_placeholder {
- for path in hits {
- let quoted = shlex::try_quote(&path).unwrap_or(path.clone().into());
- let cmd_final = if cmd_tpl.contains("{}") {
- cmd_tpl.replace("{}", "ed)
- } else {
- format!("{} {}", cmd_tpl, "ed)
- };
- if let Some(mut parts) = shlex::split(&cmd_final) {
- if parts.is_empty() {
- continue;
- }
- let prog = parts.remove(0);
- let status = Command::new(&prog).args(&parts).status()?;
- if !status.success() {
- error!(file=%path, command=%cmd_final, code=?status.code(), "command failed");
- }
- }
- }
- }
+ run_exec(&hits, &cmd_tpl)?;
} else {
if hits.is_empty() {
- eprintln!("No matches for query: `{}` (FTS expression: `{}`)", raw_query, fts_expr);
+ eprintln!(
+ "No matches for query: `{raw_query}` (FTS expression: `{fts_expr}`)"
+ );
} else {
for p in hits {
- println!("{}", p);
+ println!("{p}");
}
}
}
@@ -279,10 +336,81 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option
Ok(())
}
-/// Quote terms for FTS when needed.
-fn escape_fts_query_term(term: &str) -> String {
+/// Simple, case-insensitive substring scan over paths and (small) file bodies.
+fn naive_substring_search(conn: &rusqlite::Connection, term: &str) -> Result> {
+ let term_lc = term.to_lowercase();
+
+ let mut stmt = conn.prepare("SELECT path FROM files")?;
+ let rows = stmt.query_map([], |r| r.get::<_, String>(0))?;
+
+ let mut out = Vec::new();
+ for p in rows {
+ let p = p?;
+ if p.to_lowercase().contains(&term_lc) {
+ out.push(p.clone());
+ continue;
+ }
+ // Only inspect small files to stay fast
+ if let Ok(meta) = fs::metadata(&p) {
+ if meta.len() > 64_000 {
+ continue;
+ }
+ }
+ if let Ok(content) = fs::read_to_string(&p) {
+ if content.to_lowercase().contains(&term_lc) {
+ out.push(p);
+ }
+ }
+ }
+ Ok(out)
+}
+
+/// Helper: run an external command template on every hit.
+fn run_exec(paths: &[String], cmd_tpl: &str) -> Result<()> {
+ let mut ran_without_placeholder = false;
+
+ if paths.is_empty() && !cmd_tpl.contains("{}") {
+ if let Some(mut parts) = shlex::split(cmd_tpl) {
+ if !parts.is_empty() {
+ let prog = parts.remove(0);
+ let status = Command::new(&prog).args(&parts).status()?;
+ if !status.success() {
+ error!(command = %cmd_tpl, code = ?status.code(), "command failed");
+ }
+ }
+ }
+ ran_without_placeholder = true;
+ }
+
+ if !ran_without_placeholder {
+ for p in paths {
+ let quoted = shlex::try_quote(p).unwrap_or_else(|_| p.into());
+ let final_cmd = if cmd_tpl.contains("{}") {
+ cmd_tpl.replace("{}", "ed)
+ } else {
+ format!("{cmd_tpl} {quoted}")
+ };
+ if let Some(mut parts) = shlex::split(&final_cmd) {
+ if parts.is_empty() {
+ continue;
+ }
+ let prog = parts.remove(0);
+ let status = Command::new(&prog).args(&parts).status()?;
+ if !status.success() {
+ error!(file = %p, command = %final_cmd, code = ?status.code(), "command failed");
+ }
+ }
+ }
+ }
+ Ok(())
+}
+
+/* ---------- misc helpers ---------- */
+
+fn escape_fts(term: &str) -> String {
if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c))
- || ["AND","OR","NOT","NEAR"].contains(&term.to_uppercase().as_str())
+ || ["AND", "OR", "NOT", "NEAR"]
+ .contains(&term.to_uppercase().as_str())
{
format!("\"{}\"", term.replace('"', "\"\""))
} else {
@@ -292,16 +420,22 @@ fn escape_fts_query_term(term: &str) -> String {
/// Determine a filesystem root to limit recursive walking.
fn determine_scan_root(pattern: &str) -> PathBuf {
- let wildcard_pos = pattern.find(|c| c == '*' || c == '?' || c == '[').unwrap_or(pattern.len());
- let prefix = &pattern[..wildcard_pos];
- let mut root = PathBuf::from(prefix);
- while root.as_os_str().to_string_lossy().contains(|c| ['*','?','['].contains(&c)) {
- if let Some(parent) = root.parent() {
- root = parent.to_path_buf();
- } else {
- root = PathBuf::from(".");
- break;
- }
+ let first_wild = pattern
+ .find(|c| matches!(c, '*' | '?' | '['))
+ .unwrap_or(pattern.len());
+ let mut root = PathBuf::from(&pattern[..first_wild]);
+
+ while root
+ .as_os_str()
+ .to_string_lossy()
+ .contains(|c| matches!(c, '*' | '?' | '['))
+ {
+ root = root.parent().map(Path::to_path_buf).unwrap_or_default();
+ }
+
+ if root.as_os_str().is_empty() {
+ PathBuf::from(".")
+ } else {
+ root
}
- root
}
diff --git a/src/test_hierarchical_tags.rs b/src/test_hierarchical_tags.rs
new file mode 100644
index 0000000..5c36911
--- /dev/null
+++ b/src/test_hierarchical_tags.rs
@@ -0,0 +1,240 @@
+// Test script to validate hierarchical tag FTS fix
+// This script demonstrates how the fix works with a simple test case
+
+use rusqlite::{Connection, params};
+use std::path::Path;
+use std::fs;
+use anyhow::Result;
+
+fn main() -> Result<()> {
+ // Create a test database in a temporary location
+ let db_path = Path::new("/tmp/marlin_test.db");
+ if db_path.exists() {
+ fs::remove_file(db_path)?;
+ }
+
+ println!("Creating test database at {:?}", db_path);
+
+ // Initialize database with our schema and migrations
+ let conn = Connection::open(db_path)?;
+
+ // Apply schema (simplified version of what's in the migrations)
+ println!("Applying schema...");
+ conn.execute_batch(
+ "PRAGMA foreign_keys = ON;
+ PRAGMA journal_mode = WAL;
+
+ CREATE TABLE files (
+ id INTEGER PRIMARY KEY,
+ path TEXT NOT NULL UNIQUE,
+ size INTEGER,
+ mtime INTEGER,
+ hash TEXT
+ );
+
+ CREATE TABLE tags (
+ id INTEGER PRIMARY KEY,
+ name TEXT NOT NULL,
+ parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
+ canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
+ UNIQUE(name, parent_id)
+ );
+
+ CREATE TABLE file_tags (
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
+ PRIMARY KEY(file_id, tag_id)
+ );
+
+ CREATE TABLE attributes (
+ id INTEGER PRIMARY KEY,
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ key TEXT NOT NULL,
+ value TEXT,
+ UNIQUE(file_id, key)
+ );
+
+ CREATE VIRTUAL TABLE files_fts
+ USING fts5(
+ path,
+ tags_text,
+ attrs_text,
+ content='',
+ tokenize=\"unicode61 remove_diacritics 2\"
+ );"
+ )?;
+
+ // Apply our fixed triggers
+ println!("Applying fixed FTS triggers...");
+ conn.execute_batch(
+ "CREATE TRIGGER files_fts_ai_file
+ AFTER INSERT ON files
+ BEGIN
+ INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
+ VALUES (
+ NEW.id,
+ NEW.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path as tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = NEW.id
+
+ UNION
+
+ SELECT t.name as tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = NEW.id)
+ );
+ END;
+
+ CREATE TRIGGER file_tags_fts_ai
+ AFTER INSERT ON file_tags
+ BEGIN
+ INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
+ SELECT f.id, f.path,
+ (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
+ FROM (
+ WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
+ SELECT t.id, t.name, t.parent_id, t.name
+ FROM tags t
+ WHERE t.parent_id IS NULL
+
+ UNION ALL
+
+ SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
+ FROM tags t
+ JOIN tag_tree tt ON t.parent_id = tt.id
+ )
+ SELECT DISTINCT tag_tree.path as tag_path
+ FROM file_tags ft
+ JOIN tag_tree ON ft.tag_id = tag_tree.id
+ WHERE ft.file_id = f.id
+
+ UNION
+
+ SELECT t.name as tag_path
+ FROM file_tags ft
+ JOIN tags t ON ft.tag_id = t.id
+ WHERE ft.file_id = f.id AND t.parent_id IS NULL
+ )),
+ (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
+ FROM attributes a
+ WHERE a.file_id = f.id)
+ FROM files f
+ WHERE f.id = NEW.file_id;
+ END;"
+ )?;
+
+ // Insert test data
+ println!("Inserting test data...");
+
+ // Insert a test file
+ conn.execute(
+ "INSERT INTO files (id, path) VALUES (1, '/test/document.md')",
+ [],
+ )?;
+
+ // Create hierarchical tags: project/md
+ println!("Creating hierarchical tags: project/md");
+
+ // Insert parent tag 'project'
+ conn.execute(
+ "INSERT INTO tags (id, name, parent_id) VALUES (1, 'project', NULL)",
+ [],
+ )?;
+
+ // Insert child tag 'md' under 'project'
+ conn.execute(
+ "INSERT INTO tags (id, name, parent_id) VALUES (2, 'md', 1)",
+ [],
+ )?;
+
+ // Tag the file with the 'md' tag (which is under 'project')
+ conn.execute(
+ "INSERT INTO file_tags (file_id, tag_id) VALUES (1, 2)",
+ [],
+ )?;
+
+ // Check what's in the FTS index
+ println!("\nChecking FTS index content:");
+ let mut stmt = conn.prepare("SELECT rowid, path, tags_text, attrs_text FROM files_fts")?;
+ let rows = stmt.query_map([], |row| {
+ Ok((
+ row.get::<_, i64>(0)?,
+ row.get::<_, String>(1)?,
+ row.get::<_, String>(2)?,
+ row.get::<_, String>(3)?,
+ ))
+ })?;
+
+ for row in rows {
+ let (id, path, tags, attrs) = row?;
+ println!("ID: {}, Path: {}, Tags: '{}', Attrs: '{}'", id, path, tags, attrs);
+ }
+
+ // Test searching for the full hierarchical tag path
+ println!("\nTesting search for 'project/md':");
+ let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project/md'")?;
+ let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
+
+ let mut found = false;
+ for row in rows {
+ found = true;
+ println!("Found file: {}", row?);
+ }
+
+ if !found {
+ println!("No files found with tag 'project/md'");
+ }
+
+ // Test searching for just the parent tag
+ println!("\nTesting search for just 'project':");
+ let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project'")?;
+ let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
+
+ let mut found = false;
+ for row in rows {
+ found = true;
+ println!("Found file: {}", row?);
+ }
+
+ if !found {
+ println!("No files found with tag 'project'");
+ }
+
+ // Test searching for just the child tag
+ println!("\nTesting search for just 'md':");
+ let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'md'")?;
+ let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
+
+ let mut found = false;
+ for row in rows {
+ found = true;
+ println!("Found file: {}", row?);
+ }
+
+ if !found {
+ println!("No files found with tag 'md'");
+ }
+
+ println!("\nTest completed successfully!");
+ Ok(())
+}
diff --git a/target/release/marlin b/target/release/marlin
index dffd7de..a571766 100755
Binary files a/target/release/marlin and b/target/release/marlin differ
diff --git a/target/release/marlin.d b/target/release/marlin.d
index 8d05671..391de91 100644
--- a/target/release/marlin.d
+++ b/target/release/marlin.d
@@ -1 +1 @@
-/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs
+/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs
diff --git a/tests/e2e.rs b/tests/e2e.rs
new file mode 100644
index 0000000..64fc8dc
--- /dev/null
+++ b/tests/e2e.rs
@@ -0,0 +1,121 @@
+//! End-to-end “happy path” smoke-tests for the `marlin` binary.
+//!
+//! Run with `cargo test --test e2e` (CI does) or `cargo test`.
+
+use assert_cmd::prelude::*;
+use predicates::prelude::*;
+use std::{fs, path::PathBuf, process::Command};
+use tempfile::tempdir;
+
+/// Absolute path to the freshly-built `marlin` binary.
+fn marlin_bin() -> PathBuf {
+ PathBuf::from(env!("CARGO_BIN_EXE_marlin"))
+}
+
+/// Create the demo directory structure and seed files.
+fn spawn_demo_tree(root: &PathBuf) {
+ fs::create_dir_all(root.join("Projects/Alpha")).unwrap();
+ fs::create_dir_all(root.join("Projects/Beta")).unwrap();
+ fs::create_dir_all(root.join("Projects/Gamma")).unwrap();
+ fs::create_dir_all(root.join("Logs")).unwrap();
+ fs::create_dir_all(root.join("Reports")).unwrap();
+
+ fs::write(root.join("Projects/Alpha/draft1.md"), "- [ ] TODO foo\n").unwrap();
+ fs::write(root.join("Projects/Alpha/draft2.md"), "- [x] TODO foo\n").unwrap();
+ fs::write(root.join("Projects/Beta/final.md"), "done\n").unwrap();
+ fs::write(root.join("Projects/Gamma/TODO.txt"), "TODO bar\n").unwrap();
+ fs::write(root.join("Logs/app.log"), "ERROR omg\n").unwrap();
+ fs::write(root.join("Reports/Q1.pdf"), "PDF\n").unwrap();
+}
+
+/// Shorthand for “run and must succeed”.
+fn ok(cmd: &mut Command) -> assert_cmd::assert::Assert {
+ cmd.assert().success()
+}
+
+#[test]
+fn full_cli_flow() -> Result<(), Box> {
+ /* ── 1 ░ sandbox ───────────────────────────────────────────── */
+
+ let tmp = tempdir()?; // wiped on drop
+ let demo_dir = tmp.path().join("marlin_demo");
+ spawn_demo_tree(&demo_dir);
+
+ let db_path = demo_dir.join("index.db");
+
+ // Helper to spawn a fresh `marlin` Command with the DB env-var set
+ let marlin = || {
+ let mut c = Command::new(marlin_bin());
+ c.env("MARLIN_DB_PATH", &db_path);
+ c
+ };
+
+ /* ── 2 ░ init ( auto-scan cwd ) ───────────────────────────── */
+
+ ok(marlin()
+ .current_dir(&demo_dir)
+ .arg("init"));
+
+ /* ── 3 ░ tag & attr demos ─────────────────────────────────── */
+
+ ok(marlin()
+ .arg("tag")
+ .arg(format!("{}/Projects/**/*.md", demo_dir.display()))
+ .arg("project/md"));
+
+ ok(marlin()
+ .arg("attr")
+ .arg("set")
+ .arg(format!("{}/Reports/*.pdf", demo_dir.display()))
+ .arg("reviewed")
+ .arg("yes"));
+
+ /* ── 4 ░ quick search sanity checks ───────────────────────── */
+
+ marlin()
+ .arg("search").arg("TODO")
+ .assert()
+ .stdout(predicate::str::contains("TODO.txt"));
+
+ marlin()
+ .arg("search").arg("attr:reviewed=yes")
+ .assert()
+ .stdout(predicate::str::contains("Q1.pdf"));
+
+ /* ── 5 ░ link flow & backlinks ────────────────────────────── */
+
+ let foo = demo_dir.join("foo.txt");
+ let bar = demo_dir.join("bar.txt");
+ fs::write(&foo, "")?;
+ fs::write(&bar, "")?;
+
+ ok(marlin().arg("scan").arg(&demo_dir));
+
+ ok(marlin()
+ .arg("link").arg("add")
+ .arg(&foo).arg(&bar));
+
+ marlin()
+ .arg("link").arg("backlinks").arg(&bar)
+ .assert()
+ .stdout(predicate::str::contains("foo.txt"));
+
+ /* ── 6 ░ backup → delete DB → restore ────────────────────── */
+
+ let backup_path = String::from_utf8(
+ marlin().arg("backup").output()?.stdout
+ )?;
+ let backup_file = backup_path.split_whitespace().last().unwrap();
+
+ fs::remove_file(&db_path)?; // simulate corruption
+ ok(marlin().arg("restore").arg(backup_file)); // restore
+
+ // Search must still work afterwards
+ marlin()
+ .arg("search").arg("TODO")
+ .assert()
+ .stdout(predicate::str::contains("TODO.txt"));
+
+ Ok(())
+}
+
diff --git a/tests/neg.rs b/tests/neg.rs
new file mode 100644
index 0000000..89af7f1
--- /dev/null
+++ b/tests/neg.rs
@@ -0,0 +1,81 @@
+//! Negative-path integration tests (“should fail / warn”).
+
+use predicates::str;
+use tempfile::tempdir;
+
+mod util;
+use util::marlin;
+
+/* ───────────────────────── LINKS ─────────────────────────────── */
+
+#[test]
+fn link_non_indexed_should_fail() {
+ let tmp = tempdir().unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ std::fs::write(tmp.path().join("foo.txt"), "").unwrap();
+ std::fs::write(tmp.path().join("bar.txt"), "").unwrap();
+
+ marlin(&tmp)
+ .current_dir(tmp.path())
+ .args([
+ "link", "add",
+ &tmp.path().join("foo.txt").to_string_lossy(),
+ &tmp.path().join("bar.txt").to_string_lossy()
+ ])
+ .assert()
+ .failure()
+ .stderr(str::contains("file not indexed"));
+}
+
+/* ───────────────────────── ATTR ─────────────────────────────── */
+
+#[test]
+fn attr_set_on_non_indexed_file_should_warn() {
+ let tmp = tempdir().unwrap();
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ let ghost = tmp.path().join("ghost.txt");
+ std::fs::write(&ghost, "").unwrap();
+
+ marlin(&tmp)
+ .args(["attr","set",
+ &ghost.to_string_lossy(),"foo","bar"])
+ .assert()
+ .success() // exits 0
+ .stderr(str::contains("not indexed"));
+}
+
+/* ───────────────────── COLLECTIONS ───────────────────────────── */
+
+#[test]
+fn coll_add_unknown_collection_should_fail() {
+ let tmp = tempdir().unwrap();
+ let file = tmp.path().join("doc.txt");
+ std::fs::write(&file, "").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ marlin(&tmp)
+ .args(["coll","add","nope",&file.to_string_lossy()])
+ .assert()
+ .failure();
+}
+
+/* ───────────────────── RESTORE (bad file) ───────────────────── */
+
+#[test]
+fn restore_with_nonexistent_backup_should_fail() {
+ let tmp = tempdir().unwrap();
+
+ // create an empty DB first
+ marlin(&tmp).arg("init").assert().success();
+
+ marlin(&tmp)
+ .args(["restore", "/definitely/not/here.db"])
+ .assert()
+ .failure()
+ .stderr(str::contains("Failed to restore"));
+}
+
diff --git a/tests/pos.rs b/tests/pos.rs
new file mode 100644
index 0000000..1d00659
--- /dev/null
+++ b/tests/pos.rs
@@ -0,0 +1,171 @@
+//! Positive-path integration checks for every sub-command
+//! that already has real logic behind it.
+
+mod util;
+use util::marlin;
+
+use predicates::{prelude::*, str}; // brings `PredicateBooleanExt::and`
+use std::fs;
+use tempfile::tempdir;
+
+/* ─────────────────────────── TAG ─────────────────────────────── */
+
+#[test]
+fn tag_should_add_hierarchical_tag_and_search_finds_it() {
+ let tmp = tempdir().unwrap();
+ let file = tmp.path().join("foo.md");
+ fs::write(&file, "# test\n").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ marlin(&tmp)
+ .args(["tag", file.to_str().unwrap(), "project/md"])
+ .assert().success();
+
+ marlin(&tmp)
+ .args(["search", "tag:project/md"])
+ .assert()
+ .success()
+ .stdout(str::contains("foo.md"));
+}
+
+/* ─────────────────────────── ATTR ────────────────────────────── */
+
+#[test]
+fn attr_set_then_ls_roundtrip() {
+ let tmp = tempdir().unwrap();
+ let file = tmp.path().join("report.pdf");
+ fs::write(&file, "%PDF-1.4\n").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ marlin(&tmp)
+ .args(["attr", "set", file.to_str().unwrap(), "reviewed", "yes"])
+ .assert().success();
+
+ marlin(&tmp)
+ .args(["attr", "ls", file.to_str().unwrap()])
+ .assert()
+ .success()
+ .stdout(str::contains("reviewed = yes"));
+}
+
+/* ─────────────────────── COLLECTIONS ────────────────────────── */
+
+#[test]
+fn coll_create_add_and_list() {
+ let tmp = tempdir().unwrap();
+
+ let a = tmp.path().join("a.txt");
+ let b = tmp.path().join("b.txt");
+ fs::write(&a, "").unwrap();
+ fs::write(&b, "").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ marlin(&tmp).args(["coll", "create", "Set"]).assert().success();
+ for f in [&a, &b] {
+ marlin(&tmp).args(["coll", "add", "Set", f.to_str().unwrap()]).assert().success();
+ }
+
+ marlin(&tmp)
+ .args(["coll", "list", "Set"])
+ .assert()
+ .success()
+ .stdout(str::contains("a.txt").and(str::contains("b.txt")));
+}
+
+/* ─────────────────────────── VIEWS ───────────────────────────── */
+
+#[test]
+fn view_save_list_and_exec() {
+ let tmp = tempdir().unwrap();
+
+ let todo = tmp.path().join("TODO.txt");
+ fs::write(&todo, "remember the milk\n").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ // save & list
+ marlin(&tmp).args(["view", "save", "tasks", "milk"]).assert().success();
+ marlin(&tmp)
+ .args(["view", "list"])
+ .assert()
+ .success()
+ .stdout(str::contains("tasks: milk"));
+
+ // exec
+ marlin(&tmp)
+ .args(["view", "exec", "tasks"])
+ .assert()
+ .success()
+ .stdout(str::contains("TODO.txt"));
+}
+
+/* ─────────────────────────── LINKS ───────────────────────────── */
+
+#[test]
+fn link_add_rm_and_list() {
+ let tmp = tempdir().unwrap();
+
+ let foo = tmp.path().join("foo.txt");
+ let bar = tmp.path().join("bar.txt");
+ fs::write(&foo, "").unwrap();
+ fs::write(&bar, "").unwrap();
+
+ // handy closure
+ let mc = || marlin(&tmp);
+
+ mc().current_dir(tmp.path()).arg("init").assert().success();
+ mc().args(["scan", tmp.path().to_str().unwrap()]).assert().success();
+
+ // add
+ mc().args(["link", "add", foo.to_str().unwrap(), bar.to_str().unwrap()])
+ .assert().success();
+
+ // list (outgoing default)
+ mc().args(["link", "list", foo.to_str().unwrap()])
+ .assert().success()
+ .stdout(str::contains("foo.txt").and(str::contains("bar.txt")));
+
+ // remove
+ mc().args(["link", "rm", foo.to_str().unwrap(), bar.to_str().unwrap()])
+ .assert().success();
+
+ // list now empty
+ mc().args(["link", "list", foo.to_str().unwrap()])
+ .assert().success()
+ .stdout(str::is_empty());
+}
+
+/* ─────────────────────── SCAN (multi-path) ───────────────────── */
+
+#[test]
+fn scan_with_multiple_paths_indexes_all() {
+ let tmp = tempdir().unwrap();
+
+ let dir_a = tmp.path().join("A");
+ let dir_b = tmp.path().join("B");
+ std::fs::create_dir_all(&dir_a).unwrap();
+ std::fs::create_dir_all(&dir_b).unwrap();
+ let f1 = dir_a.join("one.txt");
+ let f2 = dir_b.join("two.txt");
+ fs::write(&f1, "").unwrap();
+ fs::write(&f2, "").unwrap();
+
+ marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
+
+ // multi-path scan
+ marlin(&tmp)
+ .args(["scan", dir_a.to_str().unwrap(), dir_b.to_str().unwrap()])
+ .assert().success();
+
+ // both files findable
+ for term in ["one.txt", "two.txt"] {
+ marlin(&tmp).args(["search", term])
+ .assert()
+ .success()
+ .stdout(str::contains(term));
+ }
+}
+
diff --git a/tests/test.md b/tests/test.md
new file mode 100644
index 0000000..bc1abc1
--- /dev/null
+++ b/tests/test.md
@@ -0,0 +1,68 @@
+# Testing
+
+Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
+
+---
+
+## 0 Prepare once
+
+```bash
+# Run once (or add to ~/.bashrc) so debug + release artefacts land
+# in the same predictable place. Speeds-up future builds.
+export CARGO_TARGET_DIR=target
+```
+
+---
+
+## 1 Build the new binary
+
+```bash
+git pull # grab the latest commit
+cargo build --release
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
+```
+
+* `cargo build --release` – builds the optimised binary.
+* `install …` – copies it into your `$PATH` so `marlin` on the CLI is the fresh one.
+
+---
+
+## 2 Run the smoke-test suite
+
+```bash
+# Runs the end-to-end test we added in tests/e2e.rs
+cargo test --test e2e -- --nocapture
+```
+
+* `--test e2e` – compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like).
+* `--nocapture` – streams stdout/stderr so you can watch each CLI step in real time.
+* Exit-code **0** ➜ everything passed.
+ Any non-zero exit or a red ✗ line means a step failed; the assert’s diff will show the command and its output.
+
+---
+
+## 3 (Optionally) run all tests
+
+```bash
+cargo test --all -- --nocapture
+```
+
+This will execute:
+
+* unit tests in `src/**`
+* every file in `tests/`
+* doc-tests
+
+If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically.
+
+---
+
+### One-liner helper (copy/paste)
+
+```bash
+git pull && cargo build --release &&
+sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
+cargo test --all -- --nocapture
+```
+
+Stick that in a shell alias (`alias marlin-ci='…'`) and you’ve got a 5-second upgrade-and-verify loop.
diff --git a/tests/util.rs b/tests/util.rs
new file mode 100644
index 0000000..5f19ffb
--- /dev/null
+++ b/tests/util.rs
@@ -0,0 +1,23 @@
+//! tests/util.rs
+//! Small helpers shared across integration tests.
+
+use std::path::{Path, PathBuf};
+use tempfile::TempDir;
+use assert_cmd::Command;
+/// Absolute path to the freshly-built `marlin` binary.
+pub fn bin() -> PathBuf {
+ PathBuf::from(env!("CARGO_BIN_EXE_marlin"))
+}
+
+/// Build a `Command` for `marlin` whose `MARLIN_DB_PATH` is
+/// `/index.db`.
+///
+/// Each call yields a brand-new `Command`, so callers can freely add
+/// arguments, change the working directory, etc., without affecting
+/// other invocations.
+pub fn marlin(tmp: &TempDir) -> Command {
+ let db_path: &Path = &tmp.path().join("index.db");
+ let mut cmd = Command::new(bin());
+ cmd.env("MARLIN_DB_PATH", db_path);
+ cmd
+}