Merge pull request #1 from PR0M3TH3AN/beta

Beta
This commit is contained in:
thePR0M3TH3AN
2025-05-16 22:53:02 -04:00
committed by GitHub
32 changed files with 2883 additions and 403 deletions

259
Cargo.lock generated
View File

@@ -94,6 +94,22 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "assert_cmd"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66"
dependencies = [
"anstyle",
"bstr",
"doc-comment",
"libc",
"predicates",
"predicates-core",
"predicates-tree",
"wait-timeout",
]
[[package]]
name = "autocfg"
version = "1.4.0"
@@ -106,6 +122,17 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
@@ -163,6 +190,15 @@ dependencies = [
"strsim",
]
[[package]]
name = "clap_complete"
version = "4.5.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c91d3baa3bcd889d60e6ef28874126a0b384fd225ab83aa6d8a801c519194ce1"
dependencies = [
"clap",
]
[[package]]
name = "clap_derive"
version = "4.5.32"
@@ -193,6 +229,12 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "difflib"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "directories"
version = "5.0.1"
@@ -202,6 +244,15 @@ dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "6.0.0"
@@ -235,6 +286,22 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "errno"
version = "0.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
dependencies = [
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
@@ -247,6 +314,21 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "float-cmp"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
dependencies = [
"num-traits",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -255,7 +337,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@@ -318,6 +412,12 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "js-sys"
version = "0.3.77"
@@ -361,6 +461,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "log"
version = "0.4.27"
@@ -372,13 +478,19 @@ name = "marlin"
version = "0.1.0"
dependencies = [
"anyhow",
"assert_cmd",
"chrono",
"clap",
"clap_complete",
"directories",
"dirs 5.0.1",
"glob",
"predicates",
"rusqlite",
"serde_json",
"shellexpand",
"shlex",
"tempfile",
"tracing",
"tracing-subscriber",
"walkdir",
@@ -399,6 +511,12 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "normalize-line-endings"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -448,6 +566,36 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "predicates"
version = "3.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573"
dependencies = [
"anstyle",
"difflib",
"float-cmp",
"normalize-line-endings",
"predicates-core",
"regex",
]
[[package]]
name = "predicates-core"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa"
[[package]]
name = "predicates-tree"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c"
dependencies = [
"predicates-core",
"termtree",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
@@ -466,13 +614,19 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
[[package]]
name = "redox_users"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror 1.0.69",
]
@@ -483,7 +637,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror 2.0.12",
]
@@ -546,12 +700,31 @@ dependencies = [
"smallvec",
]
[[package]]
name = "rustix"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.59.0",
]
[[package]]
name = "rustversion"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
[[package]]
name = "ryu"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
@@ -561,6 +734,38 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@@ -576,7 +781,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb"
dependencies = [
"dirs",
"dirs 6.0.0",
]
[[package]]
@@ -608,6 +813,25 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tempfile"
version = "3.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
dependencies = [
"fastrand",
"getrandom 0.3.3",
"once_cell",
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "termtree"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -749,6 +973,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -765,6 +998,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
@@ -1052,6 +1294,15 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]]
name = "zerocopy"
version = "0.8.25"

View File

@@ -15,4 +15,17 @@ walkdir = "2.5"
shlex = "1.3"
chrono = "0.4"
shellexpand = "3.1"
clap_complete = "4.1"
serde_json = { version = "1", optional = true } # <-- NEW
[dev-dependencies]
assert_cmd = "2"
predicates = "3"
tempfile = "3"
dirs = "5" # cross-platform data dir helper
[features]
# The CLI prints JSON only when this feature is enabled.
# Having the feature listed silences the `unexpected cfg` lint even
# when you dont turn it on.
json = ["serde_json"]

289
README.md
View File

@@ -2,21 +2,24 @@
# Marlin
**Marlin** is a lightweight, metadata-driven file indexer that runs 100 % on your computer. It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, takes automatic snapshots, and offers instant full-text search via FTS5.
*No cloud, no telemetry your data never leaves the machine.*
**Marlin** is a lightweight, metadata-driven file indexer that runs **100 % on your computer**.
It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, keeps timestamped **snapshots**, and offers instant full-text search via FTS5.
_No cloud, no telemetry your data never leaves the machine._
---
## Feature highlights
| Area | What you get |
| -------------- | --------------------------------------------------------------------------------- |
| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) |
| **Resilience** | Versioned, idempotent schema migrations zero-downtime upgrades |
| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency |
| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) |
| **Search** | Prefix-aware FTS5 across paths, tags, and attributes; optional `--exec` per match |
| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation |
| Area | What you get |
| ------------------- | ----------------------------------------------------------------------------------------------------- |
| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) |
| **Resilience** | Versioned, idempotent schema migrations zero-downtime upgrades |
| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency |
| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) |
| **Relations** | Typed file ↔ file links (`marlin link`) with backlinks viewer |
| **Collections / Views** | Named playlists (`marlin coll`) & saved searches (`marlin view`) for instant recall |
| **Search** | Prefix-aware FTS5 across paths, tags, attrs & links; optional `--exec` per match <br>(grep-style context snippets coming Q3) |
| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation |
---
@@ -26,11 +29,11 @@
┌──────────────┐ marlin scan ┌─────────────┐
│ your files │ ─────────────────────▶│ SQLite │
│ (any folder) │ │ files/tags │
└──────────────┘ tag / attr │ attrs / FTS │
▲ search / exec └──────┬──────┘
└──────────────┘ tag / attr / link │ attrs / FTS │
search / exec └──────┬──────┘
└────────── backup / restore ▼
timestamped snapshots
```
````
---
@@ -38,7 +41,7 @@
| Requirement | Why |
| ------------------ | ----------------------------- |
| **Rust** ≥ 1.77 | Build toolchain (`rustup.rs`) |
| **Rust ≥ 1.77** | Build toolchain (`rustup.rs`) |
| C build essentials | Builds bundled SQLite (Linux) |
macOS & Windows users: let the Rust installer pull the matching build tools.
@@ -48,32 +51,83 @@ macOS & Windows users: let the Rust installer pull the matching build tools.
## Build & install
```bash
git clone https://github.com/yourname/marlin.git
cd marlin
git clone https://github.com/PR0M3TH3AN/Marlin.git
cd Marlin
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin # optional
# (Optional) install into your PATH
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
---
## Quick start
For a concise walkthrough—including **links, collections and views**—see
[**Quick start & Demo**](marlin_demo.md).
---
## Testing
Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
### 0Prepare once
```bash
marlin init # create DB (idempotent)
marlin scan ~/Pictures ~/Documents # index files
marlin tag "~/Pictures/**/*.jpg" photos/trip-2024 # add tag
marlin attr set "~/Documents/**/*.pdf" reviewed yes
marlin search reviewed --exec "xdg-open {}" # open matches
marlin backup # snapshot DB
# Put build artefacts in one place (faster incremental builds)
export CARGO_TARGET_DIR=target
```
### 1Build the new binary
```bash
git pull
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
### 2Run the smoke-test suite
```bash
cargo test --test e2e -- --nocapture
```
*Streams CLI output live; exit-code 0 = all good.*
### 3(Optionally) run **all** tests
```bash
cargo test --all -- --nocapture
```
This now covers:
* unit tests in `src/**`
* positive & negative integration suites (`tests/pos.rs`, `tests/neg.rs`)
* doc-tests
#### One-liner helper
```bash
git pull && cargo build --release &&
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
cargo test --test e2e -- --nocapture
```
Alias it as `marlin-ci` for a 5-second upgrade-and-verify loop.
---
### Database location
* **Linux** `~/.local/share/marlin/index.db`
* **macOS** `~/Library/Application Support/marlin/index.db`
* **Windows** `%APPDATA%\marlin\index.db`
| OS | Default path |
| ----------- | ----------------------------------------------- |
| **Linux** | `~/.local/share/marlin/index.db` |
| **macOS** | `~/Library/Application Support/marlin/index.db` |
| **Windows** | `%APPDATA%\marlin\index.db` |
Override with:
Override:
```bash
export MARLIN_DB_PATH=/path/to/custom.db
@@ -86,190 +140,57 @@ export MARLIN_DB_PATH=/path/to/custom.db
```text
marlin <COMMAND> [ARGS]
init create / migrate database
scan <PATHS>... walk directories & index files
tag "<glob>" <tag_path> add hierarchical tag
attr set|ls … manage custom attributes
search <query> [--exec CMD] FTS query, optionally run CMD on each hit
backup create timestamped snapshot in backups/
restore <snapshot.db> replace DB with snapshot
init create / migrate DB **and perform an initial scan of the cwd**
scan <PATHS>... walk directories & (re)index files
tag "<glob>" <tag_path> add hierarchical tag
attr set <pattern> <key> <val> set or update custom attribute
attr ls <path> list attributes
link add|rm|list|backlinks manage typed file-to-file relations
coll create|add|list manage named collections (“playlists”)
view save|list|exec save and run smart views (saved queries)
search <query> [--exec CMD] FTS5 query; optionally run CMD per hit
backup create timestamped snapshot in `backups/`
restore <snapshot.db> replace DB with snapshot
completions <shell> generate shell completions
```
### Attribute subcommands
### Attribute sub-commands
| Command | Example |
| ---------- | ------------------------------------------------ |
| `attr set` | `marlin attr set "~/Docs/**/*.pdf" reviewed yes` |
| `attr ls` | `marlin attr ls ~/Docs/report.pdf` |
| Command | Example |
| ----------- | ------------------------------------------------ |
| `attr set` | `marlin attr set ~/Docs/**/*.pdf reviewed yes` |
| `attr ls` | `marlin attr ls ~/Docs/report.pdf` |
| JSON output | `marlin --format=json attr ls ~/Docs/report.pdf` |
---
## Backups & restore
*Create snapshot*
```bash
marlin backup
# → ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
*Restore snapshot*
```bash
marlin restore ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
Marlin also takes an **automatic safety backup before every schema migration**.
> Marlin also creates an **automatic safety backup before every non-`init` command.**
> *Auto-prune (`backup --prune <N>`) lands in Q2.*
---
## Upgrading
```bash
cargo install --path . --force # rebuild & replace installed binary
cargo install --path . --force # rebuild & replace installed binary
```
The versioned migration system preserves your data across upgrades.
---
## Roadmap
See [`ROADMAP.md`](./ROADMAP.md) for the full development plan.
---
## Five-Minute Quickstart
Paste & run each block in your terminal.
---
### 0Prepare & build
```bash
# Clone or cd into your Marlin repo
cd ~/Documents/GitHub/Marlin
# Build the release binary
cargo build --release
```
---
### 1Install on your PATH
```bash
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
> Now `marlin` is available everywhere.
---
### 2Prepare a clean demo directory
```bash
rm -rf ~/marlin_demo
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
printf "Alpha draft\n" > ~/marlin_demo/Projects/Alpha/draft.txt
printf "Beta notes\n" > ~/marlin_demo/Projects/Beta/notes.md
printf "Receipt PDF\n" > ~/marlin_demo/Docs/receipt.pdf
printf "fake jpg\n" > ~/marlin_demo/Media/Photos/vacation.jpg
```
---
### 3Initialize & index files
```bash
# Use --verbose if you want full debug traces:
marlin init
marlin scan ~/marlin_demo
# or, to see every path tested:
marlin --verbose init
marlin --verbose scan ~/marlin_demo
```
> **Tip:** Rerun `marlin scan` after you add/remove/modify files; only changed files get re-indexed.
---
### 4Attach tags & attributes
```bash
# Tag everything under “Alpha”
marlin tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha
# Mark all PDFs as reviewed
marlin attr set "~/marlin_demo/**/*.pdf" reviewed yes
# (or with debug)
marlin --verbose tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha
marlin --verbose attr set "~/marlin_demo/**/*.pdf" reviewed yes
```
---
### 5Search your index
```bash
# By tag or filename
marlin search alpha
# Combined terms (AND across path+attrs)
marlin search "reviewed AND pdf"
# Run a command on each hit
marlin search reviewed --exec "echo HIT → {}"
# If things arent matching, add --verbose to see the underlying FTS query:
marlin --verbose search "reviewed AND pdf"
```
> `{}` in `--exec` is replaced with each files path.
---
### 6Backup & restore
```bash
# Snapshot and store its name
snap=$(marlin backup | awk '{print $NF}')
# Simulate data loss
rm ~/.local/share/marlin/index.db
# Restore instantly
marlin restore "$snap"
# Verify your files still show up
marlin search reviewed
```
> Backups live under `~/.local/share/marlin/backups` by default.
##### What you just exercised
| Command | Purpose |
| ----------------- | ----------------------------------------- |
| `marlin init` | Create / upgrade the SQLite database |
| `marlin scan` | Walk directories and (re)index files |
| `marlin tag` | Attach hierarchical tags |
| `marlin attr set` | Add/overwrite custom key-value attributes |
| `marlin search` | FTS5 search across path / tags / attrs |
| `--exec` | Pipe hits into any shell command |
| `marlin backup` | Timestamped snapshot of the DB |
| `marlin restore` | Replace live DB with a chosen snapshot |
Thats the complete surface area of Marlin today—feel free to play around or
point the scanner at real folders.
Versioned migrations preserve your data across upgrades.
---
## License
MIT see `LICENSE`
MIT see [`LICENSE`](LICENSE).

0
bar.txt Normal file
View File

0
foo.txt Normal file
View File

183
marlin_demo.md Normal file
View File

@@ -0,0 +1,183 @@
# Marlin Demo 🚀
Below is a **“hello-world” walk-through** that matches the current `main`
branch (auto-scan on `marlin init`, no more forced-migration chatter, cleaner
build). Everything runs offline on a throw-away directory under `~/marlin_demo`.
---
## 0Build & install Marlin
```bash
# inside the repo
export CARGO_TARGET_DIR=target # <-- speeds up future builds (once)
cargo build --release # build the new binary
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
# (cargo install --path . --locked --force works too)
````
---
## 1Create the demo tree
```bash
rm -rf ~/marlin_demo
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta,Gamma},Logs,Reports,Scripts,Media/Photos}
# (zsh users: quote the pattern or enable braceexpand first)
# ── Projects ───────────────────────────────────────────────────
cat <<EOF > ~/marlin_demo/Projects/Alpha/draft1.md
# Alpha draft 1
- [ ] TODO: outline architecture
- [ ] TODO: write tests
EOF
cat <<EOF > ~/marlin_demo/Projects/Alpha/draft2.md
# Alpha draft 2
- [x] TODO: outline architecture
- [ ] TODO: implement feature X
EOF
cat <<EOF > ~/marlin_demo/Projects/Beta/notes.md
Beta meeting notes:
- decided on roadmap
- ACTION: follow-up with design team
EOF
cat <<EOF > ~/marlin_demo/Projects/Beta/final.md
# Beta Final
All tasks complete. Ready to ship!
EOF
cat <<EOF > ~/marlin_demo/Projects/Gamma/TODO.txt
Gamma tasks:
TODO: refactor module Y
EOF
# ── Logs & Reports ─────────────────────────────────────────────
echo "2025-05-15 12:00:00 INFO Starting app" > ~/marlin_demo/Logs/app.log
echo "2025-05-15 12:01:00 ERROR Oops, crash" >> ~/marlin_demo/Logs/app.log
echo "2025-05-15 00:00:00 INFO System check OK" > ~/marlin_demo/Logs/system.log
printf "Q1 financials\n" > ~/marlin_demo/Reports/Q1_report.pdf
# ── Scripts & Media ────────────────────────────────────────────
cat <<'EOF' > ~/marlin_demo/Scripts/deploy.sh
#!/usr/bin/env bash
echo "Deploying version $1…"
EOF
chmod +x ~/marlin_demo/Scripts/deploy.sh
echo "JPEGDATA" > ~/marlin_demo/Media/Photos/event.jpg
```
---
## 2Initialise **and** index (one step)
```bash
cd ~/marlin_demo # run init from the folder you want indexed
marlin init # • creates or migrates DB
# • runs *first* full scan of this directory
```
Add more directories later with `marlin scan <dir>`.
---
## 3Tagging examples
```bash
# Tag all project markdown as project/md
marlin tag '~/marlin_demo/Projects/**/*.md' project/md
# Tag your logs
marlin tag '~/marlin_demo/Logs/**/*.log' logs/app
# Tag everything under Beta as project/beta
marlin tag '~/marlin_demo/Projects/Beta/**/*' project/beta
```
---
## 4Set custom attributes
```bash
marlin attr set '~/marlin_demo/Projects/Beta/final.md' status complete
marlin attr set '~/marlin_demo/Reports/*.pdf' reviewed yes
```
---
## 5Play with search / exec hooks
```bash
marlin search TODO
marlin search tag:project/md
marlin search 'tag:logs/app AND ERROR'
marlin search 'attr:status=complete'
marlin search 'attr:reviewed=yes AND pdf'
marlin search 'attr:reviewed=yes' --exec 'xdg-open {}'
marlin --format=json search 'attr:status=complete' # machine-readable output
```
---
## 6Verbose mode
```bash
marlin --verbose scan ~/marlin_demo # watch debug logs stream by
```
---
## 7Snapshot & restore
```bash
snap=$(marlin backup | awk '{print $NF}')
rm ~/.local/share/marlin/index.db # simulate disaster
marlin restore "$snap"
marlin search TODO # still works
```
*(Reminder: Marlin also makes an **auto-backup** before every non-`init`
command, so manual snapshots are extra insurance.)*
---
## 8Linking demo
```bash
touch ~/marlin_demo/foo.txt ~/marlin_demo/bar.txt
marlin scan ~/marlin_demo # index the new files
foo=~/marlin_demo/foo.txt
bar=~/marlin_demo/bar.txt
marlin link add "$foo" "$bar" --type references # create typed link
marlin link list "$foo" # outgoing links from foo
marlin link backlinks "$bar" # incoming links to bar
```
---
## 9Collections & smart views
```bash
# Collection
marlin coll create SetA
marlin coll add SetA '~/marlin_demo/Projects/**/*.md'
marlin coll list SetA
# Saved view (smart folder)
marlin view save tasks 'attr:status=complete OR TODO'
marlin view exec tasks
```
---
### Recap
* `cargo build --release` + `sudo install …` is still the build path.
* **`marlin init`** scans the **current working directory** on first run.
* Scan again only when you add *new* directories (`marlin scan …`).
* Auto-backups happen before every command; manual `marlin backup` gives you extra restore points.
Happy organising!
```

View File

@@ -1,40 +1,59 @@
Heres the updated roadmap with each new feature slotted in where its dependencies are best met:
# Marlin Roadmap 2025 → 2026 📜
| Phase | Focus | Why now? | Key deliverables |
| -------------------------- | -------------------------------------- | --------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
| **1. 2025-Q2 “Bedrock”** | Migrations + CI baseline + core schema | Weve stabilized migrations; now add foundational tables for links, groups, views | • CI: `cargo test` + `cargo sqlx migrate run --dry-run`<br>• New migrations: |
This document outlines the **official delivery plan** for Marlin over the next four quarters.
Every work-item below is *time-boxed, testable,* and traceable back to an end-user benefit.
* `links(src_file,dst_file,link_type)`
* `collections(name)` + `collection_files`
* `views(name,query)` <br>• CLI stubs for `marlin link` / `unlink` / `list-links` / `backlinks`, `marlin coll` and `marlin view` |
\| **2. 2025-Q2** | Leaner FTS maintenance | Per-row triggers dont scale past \~100 k files | • Replace per-row triggers with a “dirty” flag + periodic rebuild<br>• Benchmark end-to-end on 100 k files |
\| **2.1 2025-Q2** | Dirty-row FTS + CI | Prep for both scale and live-watcher—avoid full rebuilds on every change | • `scan --dirty` reindexes only changed files<br>• CI coverage for dirty-scan edge cases |
\| **2.2 2025-Q2** | Live file watching | Offer true “working-dir” mode—auto-scan on FS events | • `marlin watch [dir]` via `notify` crate<br>• Incremental scan on create/modify/delete/rename |
\| **2.3 2025-Q2** | Self-pruning backups | Instant protection and bounded storage—no manual snapshot cleanup | • `marlin backup --prune <N>` flag<br>• Post-scan hook to prune to latest 10<br>• Daily prune automation (cron or CI) |
\| **3. 2025-Q3** | FTS5 content indexing & annotations | Full-text search over file bodies + per-file notes/highlights | • Add `files.content` column + migration<br>• Extend `files_fts` to include `content`<br>• New `annotations` table + FTS triggers<br>• CLI: `marlin annotate add|list` |
\| **4. 2025-Q3** | Content hashing, dedup & versioning | Detect duplicates, track history, enable diffs | • Populate `files.hash` with SHA-256<br>`scan --rehash` option<br>• CLI: `marlin version diff <file>` |
\| **5. 2025-Q3** | Tag aliases/canonicals & semantic/AI enhancements | Control tag sprawl and lay groundwork for AI-driven suggestions | • Enforce `canonical_id` on `tags` + `tag alias add|ls|rm` CLI<br>• Create `embeddings` table<br>`scan --embed` to generate vectors<br>• CLI: `marlin tag suggest`, `marlin summary <file>`, `marlin similarity scan` |
\| **6. 2025-Q4** | Search DSL v2 & Smart Views | More powerful query grammar + reusable “virtual folders” | • Replace ad-hoc parser with a `nom`-based grammar<br>• CLI: `marlin view save|list|exec` |
\| **7. 2025-Q4** | Attribute templates, states, tasks & timeline | Structured metadata unlocks workflows, reminders & temporal context | • `templates` + `template_fields` tables + validation<br>• CLI:
* `marlin state set|transitions add|state log`
* `marlin task scan|task list`
* `marlin remind set <file> <ts> "<msg>"`
* `marlin event add <file> <date> "<desc>"`, `marlin timeline` |
\| **8. 2026-Q1** | Dolphin read-only plugin | Surface metadata, links, annotations in native file manager | • Qt sidebar showing tags, attributes, links, annotations |
\| **9. 2026-Q1** | Full edit UI | After proving read-only stable, add in-place editing | • Tag editor, collection & view managers, state/task/event dialogs |
\| **10. 2026-Q2** | Multi-device sync | Final frontier: optional sync/replication layer | • Choose sync backend (rqlite / Litestream / bespoke)<br>• Support read-only mounts for remote indexes |
> **Legend**
> ✅ = item added/clarified in the latest planning round
> Δ = new sub-deliverable (wasnt in the previous version)
---
### Current sprint (ends **2025-06-01**)
## 1Birds-eye Table
1. FTS rebuild prototype (dirty-rows) measure on 50 k files
2. `backup --prune` implementation + auto-prune hook
3. Integration tests for tag/attr workflows on Windows via GitHub Actions
4. **New:** basic `links`, `collections`, `views` migrations + CLI stubs
| Phase / Sprint | Timeline | Focus & Rationale | Key Deliverables (Δ = new) | | |
| ----------------------------------------------- | ------------------------- | ------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------ |
| **Sprint α Bedrock & Metadata Domains** | **2025-Q2 (now → 6 Jun)** | Stabilise schema & CI; land first metadata domains with discoverability. | Δ CI: `cargo test` + SQL dry-run<br>Δ Unit tests (`determine_scan_root`, `escape_fts`)<br>Δ Coverage: e2e `attr --format=json`<br>Δ Refactor: move `naive_substring_search` to shared util<br>Migrations: `links`, `collections`, `views`<br>CLI stubs: `link`, `coll`, `view`<br>`marlin demo` walkthrough | | |
| **Epic 1 Scale & Reliability** | 2025-Q2 | Keep scans fast; bullet-proof CI at 100 k files. | Δ Dirty-flag column + `scan --dirty`<br>Benchmarks: full vs dirty scan (100 k)<br>Replace per-row triggers with periodic rebuild<br>CI edge-case tests | | |
| **Epic 2 Live Mode & Self-Pruning Backups** | 2025-Q2 | Continuous indexing & hygiene—Marlin “just works”. | Δ `marlin watch [dir]` (notify/FSEvents)<br>Δ `backup --prune <N>` + auto-prune post-scan<br>Daily / PR-merge prune in CI | | |
| **Phase 3 Content FTS & Annotations** | 2025-Q3 | Index file bodies, grep-style context, inline notes. | `files.content` + migration<br>Extend `files_fts` (context snippets `-C`)<br>`annotations` table + triggers<br>CLI \`annotate add | list\` | |
| **Phase 4 Versioning & Deduplication** | 2025-Q3 | History, diffs & duplicate detection. | `files.hash` (SHA-256)<br>`scan --rehash` refresh<br>CLI `version diff <file>` | | |
| **Phase 5 Tag Aliases & Semantic Booster** | 2025-Q3 | Tame tag sprawl; seed AI-powered suggestions. | `canonical_id` on `tags`; CLI `tag alias …`<br>`embeddings` table + `scan --embed`<br>CLI `tag suggest`, `similarity scan`, `summary <file>` | | |
| **Phase 6 Search DSL v2 & Smart Views** | 2025-Q4 | Robust grammar + virtual folders. | Replace parser with **`nom`** grammar (`AND`, `OR`, `()` …)<br>CLI \`view save | list | exec\` with aliases & paging |
| **Phase 7 Structured Workflows** | 2025-Q4 | First-class task / state / reminder / event life-cycles. | ✅ State engine (`files.state`, `state_changes`)<br>CLI \`state set | transitions add | log`<br>✅ Task extractor (`tasks` table) + CLI<br>`templates`+ validation<br>CLI`remind …`, `event …`, `timeline\` |
| **Phase 8 Lightweight Integrations** | 2026-Q1 | Surface Marlin in editors / terminal. | VS Code & TUI extension (tags / attrs / links / notes) | | |
| **Phase 9 Dolphin Sidebar Plugin (MVP)** | 2026-Q1 | Read-only Qt sidebar for Linux file managers. | Qt plug-in: tags, attrs, links, annotations | | |
| **Phase 10 Full Edit UI & Multi-Device Sync** | 2026-Q2 | In-place metadata editor & optional sync layer. | GUI editors (tags, views, tasks, reminders, events)<br>Pick/implement sync backend (rqlite, Litestream, …) | | |
**Development principles remain**:
---
* Local-first, offline-capable
* Ship code = ship migrations
* Instrumentation first (trace spans & timings on all new commands)
## 2Narrative & Dependencies
1. **Lock down core schema & demo** *(Sprint α).*
Developers get immediate feedback via the `marlin demo` command while CI ensures migrations never regress.
2. **Scale & Live Mode** *(Epics 1-2).*
Dirty scanning, file-watching and auto-pruned backups guarantee snappy, hands-off operation even on six-figure corpora.
3. **Richer Search** *(Phases 3-6).*
Body-content FTS + grep-style snippets lay the groundwork; `nom` grammar then elevates power-user queries and smart views.
4. **Workflow Layers** *(Phase 7).*
State transitions, tasks and reminders turn Marlin from a passive index into an active workflow engine.
5. **UX Expansions** *(Phases 8-10).*
Start lightweight (VS Code / TUI), graduate to a read-only Dolphin plug-in, then ship full editing & sync for multi-device teams.
Every outer milestone depends only on the completion of the rows above it, **so shipping discipline in early sprints de-risks the headline features down the line.**
---
## 3Next Steps
* **Sprint α kickoff:** break deliverables into stories, estimate, assign.
* **Add roadmap as `docs/ROADMAP.md`** (this file).
* Wire a **Checklist issue** on GitHub: one task per Δ bullet for instant tracking.
---
*Last updated · 2025-05-16*

View File

@@ -1,15 +1,36 @@
// src/cli.rs
use std::path::PathBuf;
use clap::{Parser, Subcommand};
pub mod link;
pub mod coll;
pub mod view;
pub mod state;
pub mod task;
pub mod remind;
pub mod annotate;
pub mod version;
pub mod event;
use clap::{Parser, Subcommand, ValueEnum};
use clap_complete::Shell;
/// Output format for commands.
#[derive(ValueEnum, Clone, Copy, Debug)]
pub enum Format {
Text,
Json,
}
/// Marlin metadata-driven file explorer (CLI utilities)
#[derive(Parser, Debug)]
#[command(author, version, about)]
#[command(author, version, about, propagate_version = true)]
pub struct Cli {
/// Enable debug logging and extra output
#[arg(long)]
pub verbose: bool,
/// Output format (text or JSON)
#[arg(long, default_value = "text", value_enum, global = true)]
pub format: Format,
#[command(subcommand)]
pub command: Commands,
}
@@ -21,12 +42,15 @@ pub enum Commands {
/// Scan one or more directories and populate the file index
Scan {
paths: Vec<PathBuf>,
/// Directories to scan (defaults to cwd)
paths: Vec<std::path::PathBuf>,
},
/// Tag files matching a glob pattern (hierarchical tags use `/`)
Tag {
/// Glob or path pattern
pattern: String,
/// Hierarchical tag name (`foo/bar`)
tag_path: String,
},
@@ -46,14 +70,58 @@ pub enum Commands {
/// Create a timestamped backup of the database
Backup,
/// Restore from a backup file (over-writes current DB)
/// Restore from a backup file (overwrites current DB)
Restore {
backup_path: PathBuf,
backup_path: std::path::PathBuf,
},
/// Generate shell completions (hidden)
#[command(hide = true)]
Completions {
/// Which shell to generate for
#[arg(value_enum)]
shell: Shell,
},
/// File-to-file links
#[command(subcommand)]
Link(link::LinkCmd),
/// Collections (groups) of files
#[command(subcommand)]
Coll(coll::CollCmd),
/// Smart views (saved queries)
#[command(subcommand)]
View(view::ViewCmd),
/// Workflow states on files
#[command(subcommand)]
State(state::StateCmd),
/// TODO/tasks management
#[command(subcommand)]
Task(task::TaskCmd),
/// Reminders on files
#[command(subcommand)]
Remind(remind::RemindCmd),
/// File annotations and highlights
#[command(subcommand)]
Annotate(annotate::AnnotateCmd),
/// Version diffs
#[command(subcommand)]
Version(version::VersionCmd),
/// Calendar events & timelines
#[command(subcommand)]
Event(event::EventCmd),
}
#[derive(Subcommand, Debug)]
pub enum AttrCmd {
Set { pattern: String, key: String, value: String },
Ls { path: PathBuf },
Ls { path: std::path::PathBuf },
}

28
src/cli/annotate.rs Normal file
View File

@@ -0,0 +1,28 @@
// src/cli/annotate.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum AnnotateCmd {
Add (ArgsAdd),
List(ArgsList),
}
#[derive(Args, Debug)]
pub struct ArgsAdd {
pub file: String,
pub note: String,
#[arg(long)] pub range: Option<String>,
#[arg(long)] pub highlight: bool,
}
#[derive(Args, Debug)]
pub struct ArgsList { pub file_pattern: String }
pub fn run(cmd: &AnnotateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
AnnotateCmd::Add(a) => todo!("annotate add {:?}", a),
AnnotateCmd::List(a) => todo!("annotate list {:?}", a),
}
}

108
src/cli/coll.rs Normal file
View File

@@ -0,0 +1,108 @@
//! `marlin coll …` named collections of files (simple “playlists”).
use clap::{Args, Subcommand};
use rusqlite::Connection;
use crate::{
cli::Format,
db,
};
#[derive(Subcommand, Debug)]
pub enum CollCmd {
/// Create an empty collection
Create(CreateArgs),
/// Add files (glob) to a collection
Add(AddArgs),
/// List files inside a collection
List(ListArgs),
}
#[derive(Args, Debug)]
pub struct CreateArgs {
pub name: String,
}
#[derive(Args, Debug)]
pub struct AddArgs {
pub name: String,
pub file_pattern: String,
}
#[derive(Args, Debug)]
pub struct ListArgs {
pub name: String,
}
/// Look-up an existing collection **without** implicitly creating it.
///
/// Returns the collection ID or an error if it doesnt exist.
fn lookup_collection_id(conn: &Connection, name: &str) -> anyhow::Result<i64> {
conn.query_row(
"SELECT id FROM collections WHERE name = ?1",
[name],
|r| r.get(0),
)
.map_err(|_| anyhow::anyhow!("collection not found: {}", name))
}
pub fn run(cmd: &CollCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> {
match cmd {
/* ── coll create ──────────────────────────────────────────── */
CollCmd::Create(a) => {
db::ensure_collection(conn, &a.name)?;
if matches!(fmt, Format::Text) {
println!("Created collection '{}'", a.name);
}
}
/* ── coll add ─────────────────────────────────────────────── */
CollCmd::Add(a) => {
// Fail if the target collection does not yet exist
let coll_id = lookup_collection_id(conn, &a.name)?;
let like = a.file_pattern.replace('*', "%");
let mut stmt = conn.prepare("SELECT id FROM files WHERE path LIKE ?1")?;
let ids: Vec<i64> = stmt
.query_map([&like], |r| r.get::<_, i64>(0))?
.collect::<Result<_, _>>()?;
for fid in &ids {
db::add_file_to_collection(conn, coll_id, *fid)?;
}
match fmt {
Format::Text => println!("Added {} file(s) → '{}'", ids.len(), a.name),
Format::Json => {
#[cfg(feature = "json")]
{
println!(
"{{\"collection\":\"{}\",\"added\":{}}}",
a.name,
ids.len()
);
}
}
}
}
/* ── coll list ────────────────────────────────────────────── */
CollCmd::List(a) => {
let files = db::list_collection(conn, &a.name)?;
match fmt {
Format::Text => {
for f in files {
println!("{f}");
}
}
Format::Json => {
#[cfg(feature = "json")]
{
println!("{}", serde_json::to_string(&files)?);
}
}
}
}
}
Ok(())
}

81
src/cli/commands.yaml Normal file
View File

@@ -0,0 +1,81 @@
# cli/commands.yaml
# Philosophy: one canonical spec stops drift between docs & code.
link:
description: "Manage typed relationships between files"
actions:
add:
args: [from, to]
flags: ["--type"]
rm:
args: [from, to]
flags: ["--type"]
list:
args: [pattern]
flags: ["--direction", "--type"]
backlinks:
args: [pattern]
coll:
description: "Manage named collections of files"
actions:
create:
args: [name]
add:
args: [name, file_pattern]
list:
args: [name]
view:
description: "Save and use smart views (saved queries)"
actions:
save:
args: [view_name, query]
list: {}
exec:
args: [view_name]
state:
description: "Track workflow states on files"
actions:
set:
args: [file_pattern, new_state]
transitions-add:
args: [from_state, to_state]
log:
args: [file_pattern]
task:
description: "Extract TODOs and manage tasks"
actions:
scan:
args: [directory]
list:
flags: ["--due-today"]
remind:
description: "Attach reminders to files"
actions:
set:
args: [file_pattern, timestamp, message]
annotate:
description: "Add notes or highlights to files"
actions:
add:
args: [file, note]
flags: ["--range", "--highlight"]
list:
args: [file_pattern]
version:
description: "Versioning and diffs"
actions:
diff:
args: [file]
event:
description: "Link files to dates/events"
actions:
add:
args: [file, date, description]
timeline: {}

24
src/cli/event.rs Normal file
View File

@@ -0,0 +1,24 @@
// src/cli/event.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum EventCmd {
Add (ArgsAdd),
Timeline,
}
#[derive(Args, Debug)]
pub struct ArgsAdd {
pub file: String,
pub date: String,
pub description: String,
}
pub fn run(cmd: &EventCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
EventCmd::Add(a) => todo!("event add {:?}", a),
EventCmd::Timeline => todo!("event timeline"),
}
}

155
src/cli/link.rs Normal file
View File

@@ -0,0 +1,155 @@
// src/cli/link.rs
use crate::db;
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum LinkCmd {
Add(LinkArgs),
Rm (LinkArgs),
List(ListArgs),
Backlinks(BacklinksArgs),
}
#[derive(Args, Debug)]
pub struct LinkArgs {
pub from: String,
pub to: String,
#[arg(long)]
pub r#type: Option<String>,
}
#[derive(Args, Debug)]
pub struct ListArgs {
pub pattern: String,
#[arg(long)]
pub direction: Option<String>,
#[arg(long)]
pub r#type: Option<String>,
}
#[derive(Args, Debug)]
pub struct BacklinksArgs {
pub pattern: String,
}
pub fn run(cmd: &LinkCmd, conn: &mut Connection, format: Format) -> anyhow::Result<()> {
match cmd {
LinkCmd::Add(args) => {
let src_id = db::file_id(conn, &args.from)?;
let dst_id = db::file_id(conn, &args.to)?;
db::add_link(conn, src_id, dst_id, args.r#type.as_deref())?;
match format {
Format::Text => {
if let Some(t) = &args.r#type {
println!("Linked '{}' → '{}' [type='{}']", args.from, args.to, t);
} else {
println!("Linked '{}' → '{}'", args.from, args.to);
}
}
Format::Json => {
let typ = args
.r#type
.as_ref()
.map(|s| format!("\"{}\"", s))
.unwrap_or_else(|| "null".into());
println!(
"{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
args.from, args.to, typ
);
}
}
}
LinkCmd::Rm(args) => {
let src_id = db::file_id(conn, &args.from)?;
let dst_id = db::file_id(conn, &args.to)?;
db::remove_link(conn, src_id, dst_id, args.r#type.as_deref())?;
match format {
Format::Text => {
if let Some(t) = &args.r#type {
println!("Removed link '{}' → '{}' [type='{}']", args.from, args.to, t);
} else {
println!("Removed link '{}' → '{}'", args.from, args.to);
}
}
Format::Json => {
let typ = args
.r#type
.as_ref()
.map(|s| format!("\"{}\"", s))
.unwrap_or_else(|| "null".into());
println!(
"{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
args.from, args.to, typ
);
}
}
}
LinkCmd::List(args) => {
let results = db::list_links(
conn,
&args.pattern,
args.direction.as_deref(),
args.r#type.as_deref(),
)?;
match format {
Format::Json => {
let items: Vec<String> = results
.into_iter()
.map(|(src, dst, t)| {
let typ = t
.as_ref()
.map(|s| format!("\"{}\"", s))
.unwrap_or_else(|| "null".into());
format!(
"{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}",
src, dst, typ
)
})
.collect();
println!("[{}]", items.join(","));
}
Format::Text => {
for (src, dst, t) in results {
if let Some(t) = t {
println!("{}{} [type='{}']", src, dst, t);
} else {
println!("{}{}", src, dst);
}
}
}
}
}
LinkCmd::Backlinks(args) => {
let results = db::find_backlinks(conn, &args.pattern)?;
match format {
Format::Json => {
let items: Vec<String> = results
.into_iter()
.map(|(src, t)| {
let typ = t
.as_ref()
.map(|s| format!("\"{}\"", s))
.unwrap_or_else(|| "null".into());
format!("{{\"from\":\"{}\",\"type\":{}}}", src, typ)
})
.collect();
println!("[{}]", items.join(","));
}
Format::Text => {
for (src, t) in results {
if let Some(t) = t {
println!("{} [type='{}']", src, t);
} else {
println!("{}", src);
}
}
}
}
}
}
Ok(())
}

22
src/cli/remind.rs Normal file
View File

@@ -0,0 +1,22 @@
// src/cli/remind.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum RemindCmd {
Set(ArgsSet),
}
#[derive(Args, Debug)]
pub struct ArgsSet {
pub file_pattern: String,
pub timestamp: String,
pub message: String,
}
pub fn run(cmd: &RemindCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
RemindCmd::Set(a) => todo!("remind set {:?}", a),
}
}

26
src/cli/state.rs Normal file
View File

@@ -0,0 +1,26 @@
// src/cli/state.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum StateCmd {
Set(ArgsSet),
TransitionsAdd(ArgsTrans),
Log(ArgsLog),
}
#[derive(Args, Debug)]
pub struct ArgsSet { pub file_pattern: String, pub new_state: String }
#[derive(Args, Debug)]
pub struct ArgsTrans { pub from_state: String, pub to_state: String }
#[derive(Args, Debug)]
pub struct ArgsLog { pub file_pattern: String }
pub fn run(cmd: &StateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
StateCmd::Set(a) => todo!("state set {:?}", a),
StateCmd::TransitionsAdd(a)=> todo!("state transitions-add {:?}", a),
StateCmd::Log(a) => todo!("state log {:?}", a),
}
}

22
src/cli/task.rs Normal file
View File

@@ -0,0 +1,22 @@
// src/cli/task.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum TaskCmd {
Scan(ArgsScan),
List(ArgsList),
}
#[derive(Args, Debug)]
pub struct ArgsScan { pub directory: String }
#[derive(Args, Debug)]
pub struct ArgsList { #[arg(long)] pub due_today: bool }
pub fn run(cmd: &TaskCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
TaskCmd::Scan(a) => todo!("task scan {:?}", a),
TaskCmd::List(a) => todo!("task list {:?}", a),
}
}

18
src/cli/version.rs Normal file
View File

@@ -0,0 +1,18 @@
// src/cli/version.rs
use clap::{Subcommand, Args};
use rusqlite::Connection;
use crate::cli::Format;
#[derive(Subcommand, Debug)]
pub enum VersionCmd {
Diff(ArgsDiff),
}
#[derive(Args, Debug)]
pub struct ArgsDiff { pub file: String }
pub fn run(cmd: &VersionCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> {
match cmd {
VersionCmd::Diff(a) => todo!("version diff {:?}", a),
}
}

168
src/cli/view.rs Normal file
View File

@@ -0,0 +1,168 @@
//! `marlin view …` save & use “smart folders” (named queries).
use std::fs;
use anyhow::Result;
use clap::{Args, Subcommand};
use rusqlite::Connection;
use crate::{cli::Format, db};
#[derive(Subcommand, Debug)]
pub enum ViewCmd {
/// Save (or update) a view
Save(ArgsSave),
/// List all saved views
List,
/// Execute a view (print matching paths)
Exec(ArgsExec),
}
#[derive(Args, Debug)]
pub struct ArgsSave {
pub view_name: String,
pub query: String,
}
#[derive(Args, Debug)]
pub struct ArgsExec {
pub view_name: String,
}
pub fn run(cmd: &ViewCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> {
match cmd {
/* ── view save ───────────────────────────────────────────── */
ViewCmd::Save(a) => {
db::save_view(conn, &a.view_name, &a.query)?;
if matches!(fmt, Format::Text) {
println!("Saved view '{}' = {}", a.view_name, a.query);
}
}
/* ── view list ───────────────────────────────────────────── */
ViewCmd::List => {
let views = db::list_views(conn)?;
match fmt {
Format::Text => {
for (name, q) in views {
println!("{name}: {q}");
}
}
Format::Json => {
#[cfg(feature = "json")]
{
println!("{}", serde_json::to_string(&views)?);
}
}
}
}
/* ── view exec ───────────────────────────────────────────── */
ViewCmd::Exec(a) => {
let raw = db::view_query(conn, &a.view_name)?;
// Re-use the tiny parser from marlin search
let fts_expr = build_fts_match(&raw);
let mut stmt = conn.prepare(
r#"
SELECT f.path
FROM files_fts
JOIN files f ON f.rowid = files_fts.rowid
WHERE files_fts MATCH ?1
ORDER BY rank
"#,
)?;
let mut paths: Vec<String> = stmt
.query_map([fts_expr], |r| r.get::<_, String>(0))?
.collect::<Result<_, _>>()?;
/* ── NEW: graceful fallback when FTS finds nothing ───── */
if paths.is_empty() && !raw.contains(':') {
paths = naive_search(conn, &raw)?;
}
if paths.is_empty() && matches!(fmt, Format::Text) {
eprintln!("(view '{}' has no matches)", a.view_name);
} else {
for p in paths {
println!("{p}");
}
}
}
}
Ok(())
}
/* ─── naive substring path/content search (≤ 64 kB files) ───────── */
fn naive_search(conn: &Connection, term: &str) -> Result<Vec<String>> {
let term_lc = term.to_lowercase();
let mut stmt = conn.prepare("SELECT path FROM files")?;
let rows = stmt.query_map([], |r| r.get::<_, String>(0))?;
let mut hits = Vec::new();
for p in rows {
let p = p?;
/* path match */
if p.to_lowercase().contains(&term_lc) {
hits.push(p);
continue;
}
/* small-file content match */
if let Ok(meta) = fs::metadata(&p) {
if meta.len() > 64_000 {
continue;
}
}
if let Ok(content) = fs::read_to_string(&p) {
if content.to_lowercase().contains(&term_lc) {
hits.push(p);
}
}
}
Ok(hits)
}
/* ─── minimal copy of search-string → FTS5 translator ───────────── */
fn build_fts_match(raw_query: &str) -> String {
use shlex;
let mut parts = Vec::new();
let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
for tok in toks {
if ["AND", "OR", "NOT"].contains(&tok.as_str()) {
parts.push(tok);
} else if let Some(tag) = tok.strip_prefix("tag:") {
for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() {
if i > 0 {
parts.push("AND".into());
}
parts.push(format!("tags_text:{}", escape(seg)));
}
} else if let Some(attr) = tok.strip_prefix("attr:") {
let mut kv = attr.splitn(2, '=');
let key = kv.next().unwrap();
if let Some(val) = kv.next() {
parts.push(format!("attrs_text:{}", escape(key)));
parts.push("AND".into());
parts.push(format!("attrs_text:{}", escape(val)));
} else {
parts.push(format!("attrs_text:{}", escape(key)));
}
} else {
parts.push(escape(&tok));
}
}
parts.join(" ")
}
fn escape(term: &str) -> String {
if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c))
|| ["AND", "OR", "NOT", "NEAR"].contains(&term.to_uppercase().as_str())
{
format!("\"{}\"", term.replace('"', "\"\""))
} else {
term.to_string()
}
}

View File

@@ -1,7 +1,10 @@
use std::path::{Path, PathBuf};
use anyhow::Result;
use directories::ProjectDirs;
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
/// Runtime configuration (currently just the DB path).
#[derive(Debug, Clone)]
@@ -10,22 +13,39 @@ pub struct Config {
}
impl Config {
/// Resolve configuration from environment or XDG directories.
/// Resolve configuration from environment or derive one per-workspace.
///
/// Priority:
/// 1. `MARLIN_DB_PATH` env-var (explicit override)
/// 2. *Workspace-local* file under XDG data dir
/// (`~/.local/share/marlin/index_<hash>.db`)
/// 3. Fallback to `./index.db` when we cannot locate an XDG dir
pub fn load() -> Result<Self> {
let db_path = std::env::var_os("MARLIN_DB_PATH")
.map(PathBuf::from)
.or_else(|| {
ProjectDirs::from("io", "Marlin", "marlin")
.map(|dirs| dirs.data_dir().join("index.db"))
})
.unwrap_or_else(|| Path::new("index.db").to_path_buf());
// 1) explicit override
if let Some(val) = std::env::var_os("MARLIN_DB_PATH") {
let p = PathBuf::from(val);
std::fs::create_dir_all(p.parent().expect("has parent"))?;
return Ok(Self { db_path: p });
}
std::fs::create_dir_all(
db_path
.parent()
.expect("db_path should always have a parent directory"),
)?;
// 2) derive per-workspace DB name from CWD hash
let cwd = std::env::current_dir()?;
let mut h = DefaultHasher::new();
cwd.hash(&mut h);
let digest = h.finish(); // 64-bit
let file_name = format!("index_{digest:016x}.db");
Ok(Self { db_path })
if let Some(dirs) = ProjectDirs::from("io", "Marlin", "marlin") {
let dir = dirs.data_dir();
std::fs::create_dir_all(dir)?;
return Ok(Self {
db_path: dir.join(file_name),
});
}
// 3) very last resort workspace-relative DB
Ok(Self {
db_path: Path::new(&file_name).to_path_buf(),
})
}
}

View File

@@ -0,0 +1,28 @@
PRAGMA foreign_keys = ON;
-- File-to-file links
CREATE TABLE IF NOT EXISTS links (
id INTEGER PRIMARY KEY,
src_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
dst_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
type TEXT,
UNIQUE(src_file_id, dst_file_id, type)
);
-- Named collections
CREATE TABLE IF NOT EXISTS collections (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS collection_files (
collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
PRIMARY KEY(collection_id, file_id)
);
-- Saved views
CREATE TABLE IF NOT EXISTS views (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
query TEXT NOT NULL
);

View File

@@ -0,0 +1,289 @@
-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded
DROP TRIGGER IF EXISTS files_fts_ai_file;
DROP TRIGGER IF EXISTS files_fts_au_file;
DROP TRIGGER IF EXISTS files_fts_ad_file;
DROP TRIGGER IF EXISTS file_tags_fts_ai;
DROP TRIGGER IF EXISTS file_tags_fts_ad;
DROP TRIGGER IF EXISTS attributes_fts_ai;
DROP TRIGGER IF EXISTS attributes_fts_au;
DROP TRIGGER IF EXISTS attributes_fts_ad;
-- Create a new trigger for file insertion that uses recursive CTE for full tag paths
CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id,
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = NEW.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
-- Recreate the file path update trigger
CREATE TRIGGER files_fts_au_file
AFTER UPDATE OF path ON files
BEGIN
UPDATE files_fts
SET path = NEW.path
WHERE rowid = NEW.id;
END;
-- Recreate the file deletion trigger
CREATE TRIGGER files_fts_ad_file
AFTER DELETE ON files
BEGIN
DELETE FROM files_fts WHERE rowid = OLD.id;
END;
-- Create new trigger for tag insertion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- Create new trigger for tag deletion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ad
AFTER DELETE ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Create new triggers for attribute operations that use recursive CTE for full tag paths
CREATE TRIGGER attributes_fts_ai
AFTER INSERT ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_au
AFTER UPDATE OF value ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_ad
AFTER DELETE ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Update all existing FTS entries with the new tag-path format
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f;

View File

@@ -1,9 +1,12 @@
// src/db/mod.rs
//! Central DB helper connection bootstrap, migrations **and** most
//! data-access helpers (tags, links, collections, saved views, …).
use std::{
fs,
path::{Path, PathBuf},
};
use std::result::Result as StdResult;
use anyhow::{Context, Result};
use chrono::Local;
use rusqlite::{
@@ -12,16 +15,20 @@ use rusqlite::{
Connection,
OpenFlags,
OptionalExtension,
TransactionBehavior,
};
use tracing::{debug, info};
use tracing::{debug, info, warn};
/* ─── embedded migrations ─────────────────────────────────────────── */
/// Embed every numbered migration file here.
const MIGRATIONS: &[(&str, &str)] = &[
("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")),
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
];
/* ─── connection bootstrap ──────────────────────────────────────────── */
/* ─── connection bootstrap ────────────────────────────────────────── */
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
let db_path_ref = db_path.as_ref();
@@ -31,16 +38,18 @@ pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "foreign_keys", "ON")?;
// Apply migrations (drops & recreates all FTS triggers)
apply_migrations(&mut conn)?;
// Wait up to 30 s for a competing writer before giving up
conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked
apply_migrations(&mut conn)?;
Ok(conn)
}
/* ─── migration runner ──────────────────────────────────────────────── */
/* ─── migration runner ────────────────────────────────────────────── */
fn apply_migrations(conn: &mut Connection) -> Result<()> {
// Ensure schema_version table
// Ensure schema_version bookkeeping table exists
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
@@ -48,10 +57,11 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
);",
)?;
// Legacy patch (ignore if exists)
// Legacy patch ignore errors if column already exists
let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []);
let tx = conn.transaction()?;
// Grab the write-lock up-front so migrations can run uninterrupted
let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
for (fname, sql) in MIGRATIONS {
let version: i64 = fname
@@ -74,13 +84,8 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
}
info!("applying migration {}", fname);
println!(
"\nSQL SCRIPT FOR MIGRATION: {}\nBEGIN SQL >>>\n{}\n<<< END SQL\n",
fname, sql
);
tx.execute_batch(sql)
.with_context(|| format!("could not apply migration {}", fname))?;
.with_context(|| format!("could not apply migration {fname}"))?;
tx.execute(
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
@@ -89,10 +94,31 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
}
tx.commit()?;
// sanity warn if any embedded migration got skipped
let mut missing = Vec::new();
for (fname, _) in MIGRATIONS {
let v: i64 = fname.split('_').next().unwrap().parse().unwrap();
let ok: bool = conn
.query_row(
"SELECT 1 FROM schema_version WHERE version = ?1",
[v],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !ok {
missing.push(v);
}
}
if !missing.is_empty() {
warn!("migrations not applied: {:?}", missing);
}
Ok(())
}
/* ─── helpers ───────────────────────────────────────────────────────── */
/* ─── tag helpers ─────────────────────────────────────────────────── */
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let mut parent: Option<i64> = None;
@@ -104,7 +130,7 @@ pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
|row| row.get(0),
|r| r.get(0),
)?;
parent = Some(id);
}
@@ -116,6 +142,8 @@ pub fn file_id(conn: &Connection, path: &str) -> Result<i64> {
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
/* ─── attributes ──────────────────────────────────────────────────── */
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
@@ -128,7 +156,161 @@ pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> R
Ok(())
}
/* ─── backup / restore ──────────────────────────────────────────────── */
/* ─── links ───────────────────────────────────────────────────────── */
pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"INSERT INTO links(src_file_id, dst_file_id, type)
VALUES (?1, ?2, ?3)
ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"DELETE FROM links
WHERE src_file_id = ?1
AND dst_file_id = ?2
AND (type IS ?3 OR type = ?3)",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
pub fn list_links(
conn: &Connection,
pattern: &str,
direction: Option<&str>,
link_type: Option<&str>,
) -> Result<Vec<(String, String, Option<String>)>> {
let like_pattern = pattern.replace('*', "%");
// Files matching pattern
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
let rows = stmt
.query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))?
.collect::<Result<Vec<_>, _>>()?;
let mut out = Vec::new();
for (fid, fpath) in rows {
let (src_col, dst_col) = match direction {
Some("in") => ("dst_file_id", "src_file_id"),
_ => ("src_file_id", "dst_file_id"),
};
let sql = format!(
"SELECT f2.path, l.type
FROM links l
JOIN files f2 ON f2.id = l.{dst_col}
WHERE l.{src_col} = ?1
AND (?2 IS NULL OR l.type = ?2)",
);
let mut stmt2 = conn.prepare(&sql)?;
let links = stmt2
.query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?)))?
.collect::<Result<Vec<_>, _>>()?;
for (other, typ) in links {
out.push((fpath.clone(), other, typ));
}
}
Ok(out)
}
pub fn find_backlinks(
conn: &Connection,
pattern: &str,
) -> Result<Vec<(String, Option<String>)>> {
let like = pattern.replace('*', "%");
let mut stmt = conn.prepare(
"SELECT f1.path, l.type
FROM links l
JOIN files f1 ON f1.id = l.src_file_id
JOIN files f2 ON f2.id = l.dst_file_id
WHERE f2.path LIKE ?1",
)?;
let rows = stmt.query_map([like], |r| {
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
})?;
let out = rows.collect::<StdResult<Vec<_>, _>>()?; // rusqlite → anyhow via `?`
Ok(out)
}
/* ─── NEW: collections helpers ────────────────────────────────────── */
pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> {
conn.execute(
"INSERT OR IGNORE INTO collections(name) VALUES (?1)",
params![name],
)?;
conn.query_row(
"SELECT id FROM collections WHERE name = ?1",
params![name],
|r| r.get(0),
)
.context("collection lookup failed")
}
pub fn add_file_to_collection(conn: &Connection, coll_id: i64, file_id: i64) -> Result<()> {
conn.execute(
"INSERT OR IGNORE INTO collection_files(collection_id, file_id)
VALUES (?1, ?2)",
params![coll_id, file_id],
)?;
Ok(())
}
pub fn list_collection(conn: &Connection, name: &str) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
r#"SELECT f.path
FROM collections c
JOIN collection_files cf ON cf.collection_id = c.id
JOIN files f ON f.id = cf.file_id
WHERE c.name = ?1
ORDER BY f.path"#,
)?;
let rows = stmt.query_map([name], |r| r.get::<_, String>(0))?;
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(list)
}
/* ─── NEW: saved views (smart folders) ────────────────────────────── */
pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
conn.execute(
"INSERT INTO views(name, query)
VALUES (?1, ?2)
ON CONFLICT(name) DO UPDATE SET query = excluded.query",
params![name, query],
)?;
Ok(())
}
pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> {
let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(list)
}
pub fn view_query(conn: &Connection, name: &str) -> Result<String> {
conn.query_row(
"SELECT query FROM views WHERE name = ?1",
[name],
|r| r.get::<_, String>(0),
)
.context(format!("no view called '{name}'"))
}
/* ─── backup / restore helpers ────────────────────────────────────── */
pub fn backup<P: AsRef<Path>>(db_path: P) -> Result<PathBuf> {
let src = db_path.as_ref();
@@ -153,3 +335,15 @@ pub fn restore<P: AsRef<Path>>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}
/* ─── tests ───────────────────────────────────────────────────────── */
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn migrations_apply_in_memory() {
open(":memory:").expect("all migrations apply");
}
}

View File

@@ -5,9 +5,13 @@ use tracing_subscriber::{fmt, EnvFilter};
/// Reads `RUST_LOG` for filtering, falls back to `info`.
pub fn init() {
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
// All tracing output (INFO, WARN, ERROR …) now goes to *stderr* so the
// integration tests can assert on warnings / errors reliably.
fmt()
.with_target(false)
.with_level(true)
.with_env_filter(filter)
.with_target(false) // hide module targets
.with_level(true) // include log level
.with_env_filter(filter) // respect RUST_LOG
.with_writer(std::io::stderr) // <-- NEW: send to stderr
.init();
}

View File

@@ -1,4 +1,6 @@
// src/main.rs
#![deny(warnings)]
mod cli;
mod config;
mod db;
@@ -6,46 +8,74 @@ mod logging;
mod scan;
use anyhow::{Context, Result};
use clap::Parser;
use clap::{CommandFactory, Parser};
use clap_complete::generate;
use glob::Pattern;
use rusqlite::params;
use shellexpand;
use shlex;
use std::{env, path::PathBuf, process::Command};
use std::{
env,
fs,
io,
path::{Path, PathBuf},
process::Command,
};
use tracing::{debug, error, info};
use walkdir::WalkDir;
use cli::{AttrCmd, Cli, Commands};
use cli::{Cli, Commands};
fn main() -> Result<()> {
// Parse CLI and bootstrap logging
/* ── CLI parsing & logging ────────────────────────────────────── */
let args = Cli::parse();
if args.verbose {
// switch on debuglevel logs
env::set_var("RUST_LOG", "debug");
}
logging::init();
let cfg = config::Config::load()?;
/* ── shell-completion shortcut ───────────────────────────────── */
// Backup before any non-init, non-backup/restore command
if !matches!(args.command, Commands::Init | Commands::Backup | Commands::Restore { .. }) {
match db::backup(&cfg.db_path) {
Ok(path) => info!("Pre-command auto-backup created at {}", path.display()),
Err(e) => error!("Failed to create pre-command auto-backup: {}", e),
}
if let Commands::Completions { shell } = &args.command {
let mut cmd = Cli::command();
generate(*shell, &mut cmd, "marlin", &mut io::stdout());
return Ok(());
}
// Open (and migrate) the DB
/* ── config & automatic backup ───────────────────────────────── */
let cfg = config::Config::load()?; // DB path, etc.
match &args.command {
Commands::Init | Commands::Backup | Commands::Restore { .. } => {}
_ => match db::backup(&cfg.db_path) {
Ok(path) => info!("Pre-command auto-backup created at {}", path.display()),
Err(e) => error!("Failed to create pre-command auto-backup: {e}"),
},
}
/* ── open DB (runs migrations if needed) ─────────────────────── */
let mut conn = db::open(&cfg.db_path)?;
/* ── command dispatch ────────────────────────────────────────── */
match args.command {
Commands::Completions { .. } => {} // already handled
Commands::Init => {
info!("Database initialised at {}", cfg.db_path.display());
// Always (re-)scan the current directory so even an existing DB
// picks up newly created files in the working tree.
let cwd = env::current_dir().context("getting current directory")?;
let count = scan::scan_directory(&mut conn, &cwd)
.context("initial scan failed")?;
info!("Initial scan complete indexed/updated {count} files");
}
Commands::Scan { paths } => {
// if none given, default to current dir
let scan_paths = if paths.is_empty() {
vec![env::current_dir()?]
} else {
@@ -56,22 +86,16 @@ fn main() -> Result<()> {
}
}
Commands::Tag { pattern, tag_path } => {
apply_tag(&conn, &pattern, &tag_path)?;
}
Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?,
Commands::Attr { action } => match action {
AttrCmd::Set { pattern, key, value } => {
attr_set(&conn, &pattern, &key, &value)?;
}
AttrCmd::Ls { path } => {
attr_ls(&conn, &path)?;
cli::AttrCmd::Set { pattern, key, value } => {
attr_set(&conn, &pattern, &key, &value)?
}
cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?,
},
Commands::Search { query, exec } => {
run_search(&conn, &query, exec)?;
}
Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
Commands::Backup => {
let path = db::backup(&cfg.db_path)?;
@@ -79,48 +103,89 @@ fn main() -> Result<()> {
}
Commands::Restore { backup_path } => {
drop(conn);
db::restore(&backup_path, &cfg.db_path)
.with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?;
println!("Restored DB file from {}", backup_path.display());
db::open(&cfg.db_path)
.with_context(|| format!("Could not open restored DB at {}", cfg.db_path.display()))?;
info!("Successfully opened and processed restored database.");
drop(conn); // close handle before overwrite
db::restore(&backup_path, &cfg.db_path).with_context(|| {
format!("Failed to restore DB from {}", backup_path.display())
})?;
println!("Restored DB from {}", backup_path.display());
db::open(&cfg.db_path).with_context(|| {
format!("Could not open restored DB at {}", cfg.db_path.display())
})?;
info!("Successfully opened restored database.");
}
/* passthrough sub-modules that still stub out their logic */
Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?,
Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?,
Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?,
Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?,
Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?,
Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?,
Commands::Annotate(an_cmd) => cli::annotate::run(&an_cmd, &mut conn, args.format)?,
Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?,
Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?,
}
Ok(())
}
/* ───────────────────────── helpers & sub-routines ───────────────── */
/* ---------- TAGS ---------- */
/// Apply a hierarchical tag to all files matching the glob pattern.
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> {
let tag_id = db::ensure_tag_path(conn, tag_path)?;
// ensure_tag_path returns the deepest-node ID
let leaf_tag_id = db::ensure_tag_path(conn, tag_path)?;
// collect that tag and all its ancestors
let mut tag_ids = Vec::new();
let mut current = Some(leaf_tag_id);
while let Some(id) = current {
tag_ids.push(id);
current = match conn.query_row(
"SELECT parent_id FROM tags WHERE id = ?1",
params![id],
|r| r.get::<_, Option<i64>>(0),
) {
Ok(parent_id) => parent_id,
Err(rusqlite::Error::QueryReturnedNoRows) => None,
Err(e) => return Err(e.into()),
};
}
let expanded = shellexpand::tilde(pattern).into_owned();
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{expanded}`"))?;
let root = determine_scan_root(&expanded);
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert =
conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?;
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert = conn.prepare(
"INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
)?;
let mut count = 0;
for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) {
for entry in WalkDir::new(&root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
{
let path_str = entry.path().to_string_lossy();
debug!("testing path: {}", path_str);
if !pat.matches(&path_str) {
debug!(" → no match");
continue;
}
debug!(" → matched");
match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(file_id) => {
if stmt_insert.execute(params![file_id, tag_id])? > 0 {
let mut newly = false;
for &tid in &tag_ids {
if stmt_insert.execute(params![file_id, tid])? > 0 {
newly = true;
}
}
if newly {
info!(file = %path_str, tag = tag_path, "tagged");
count += 1;
} else {
debug!(file = %path_str, tag = tag_path, "already tagged");
}
}
Err(rusqlite::Error::QueryReturnedNoRows) => {
@@ -132,42 +197,39 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu
}
}
if count > 0 {
info!("Applied tag '{}' to {} file(s).", tag_path, count);
} else {
info!("No new files were tagged with '{}' (no matches or already tagged).", tag_path);
}
info!(
"Applied tag '{}' to {} file(s).",
tag_path, count
);
Ok(())
}
/* ---------- ATTRIBUTES ---------- */
/// Set a key=value attribute on all files matching the glob pattern.
fn attr_set(
conn: &rusqlite::Connection,
pattern: &str,
key: &str,
value: &str,
) -> Result<()> {
fn attr_set(conn: &rusqlite::Connection, pattern: &str, key: &str, value: &str) -> Result<()> {
let expanded = shellexpand::tilde(pattern).into_owned();
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{expanded}`"))?;
let root = determine_scan_root(&expanded);
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut count = 0;
for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) {
for entry in WalkDir::new(&root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
{
let path_str = entry.path().to_string_lossy();
debug!("testing attr path: {}", path_str);
if !pat.matches(&path_str) {
debug!(" → no match");
continue;
}
debug!(" → matched");
match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(file_id) => {
db::upsert_attr(conn, file_id, key, value)?;
info!(file = %path_str, key = key, value = value, "attr set");
info!(file = %path_str, key, value, "attr set");
count += 1;
}
Err(rusqlite::Error::QueryReturnedNoRows) => {
@@ -179,45 +241,64 @@ fn attr_set(
}
}
if count > 0 {
info!("Attribute '{}: {}' set on {} file(s).", key, value, count);
} else {
info!("No attributes set (no matches or not indexed).");
}
info!(
"Attribute '{}={}' set on {} file(s).",
key, value, count
);
Ok(())
}
/// List attributes for a given file path.
fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
fn attr_ls(conn: &rusqlite::Connection, path: &Path) -> Result<()> {
let file_id = db::file_id(conn, &path.to_string_lossy())?;
let mut stmt = conn.prepare(
"SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key",
)?;
for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? {
let mut stmt =
conn.prepare("SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key")?;
for row in stmt
.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?
{
let (k, v) = row?;
println!("{k} = {v}");
}
Ok(())
}
/// Build and run an FTS5 search query, with optional exec.
/* ---------- SEARCH ---------- */
/// Run an FTS5 search, optionally piping each hit through `exec`.
/// Falls back to a simple substring scan (path + ≤64 kB file contents)
/// when the FTS index yields no rows.
fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option<String>) -> Result<()> {
let mut fts_query_parts = Vec::new();
let parts = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
for part in parts {
if ["AND", "OR", "NOT"].contains(&part.as_str()) {
fts_query_parts.push(part);
} else if let Some(tag) = part.strip_prefix("tag:") {
fts_query_parts.push(format!("tags_text:{}", escape_fts_query_term(tag)));
} else if let Some(attr) = part.strip_prefix("attr:") {
fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(attr)));
// Build the FTS MATCH expression
let mut parts = Vec::new();
let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
for tok in toks {
if ["AND", "OR", "NOT"].contains(&tok.as_str()) {
parts.push(tok);
} else if let Some(tag) = tok.strip_prefix("tag:") {
for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() {
if i > 0 {
parts.push("AND".into());
}
parts.push(format!("tags_text:{}", escape_fts(seg)));
}
} else if let Some(attr) = tok.strip_prefix("attr:") {
let mut kv = attr.splitn(2, '=');
let key = kv.next().unwrap();
if let Some(val) = kv.next() {
parts.push(format!("attrs_text:{}", escape_fts(key)));
parts.push("AND".into());
parts.push(format!("attrs_text:{}", escape_fts(val)));
} else {
parts.push(format!("attrs_text:{}", escape_fts(key)));
}
} else {
fts_query_parts.push(escape_fts_query_term(&part));
parts.push(escape_fts(&tok));
}
}
let fts_expr = fts_query_parts.join(" ");
debug!("Constructed FTS MATCH expression: {}", fts_expr);
let fts_expr = parts.join(" ");
debug!("FTS MATCH expression: {fts_expr}");
// ---------- primary FTS query ----------
let mut stmt = conn.prepare(
r#"
SELECT f.path
@@ -227,51 +308,27 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option<String>
ORDER BY rank
"#,
)?;
let hits: Vec<String> = stmt
.query_map(params![fts_expr], |row| row.get(0))?
let mut hits: Vec<String> = stmt
.query_map(params![fts_expr], |r| r.get::<_, String>(0))?
.filter_map(Result::ok)
.collect();
// ---------- graceful fallback ----------
if hits.is_empty() && !raw_query.contains(':') {
hits = naive_substring_search(conn, raw_query)?;
}
// ---------- output / exec ----------
if let Some(cmd_tpl) = exec {
let mut ran_without_placeholder = false;
if hits.is_empty() && !cmd_tpl.contains("{}") {
if let Some(mut parts) = shlex::split(&cmd_tpl) {
if !parts.is_empty() {
let prog = parts.remove(0);
let status = Command::new(&prog).args(&parts).status()?;
if !status.success() {
error!(command=%cmd_tpl, code=?status.code(), "command failed");
}
}
}
ran_without_placeholder = true;
}
if !ran_without_placeholder {
for path in hits {
let quoted = shlex::try_quote(&path).unwrap_or(path.clone().into());
let cmd_final = if cmd_tpl.contains("{}") {
cmd_tpl.replace("{}", &quoted)
} else {
format!("{} {}", cmd_tpl, &quoted)
};
if let Some(mut parts) = shlex::split(&cmd_final) {
if parts.is_empty() {
continue;
}
let prog = parts.remove(0);
let status = Command::new(&prog).args(&parts).status()?;
if !status.success() {
error!(file=%path, command=%cmd_final, code=?status.code(), "command failed");
}
}
}
}
run_exec(&hits, &cmd_tpl)?;
} else {
if hits.is_empty() {
eprintln!("No matches for query: `{}` (FTS expression: `{}`)", raw_query, fts_expr);
eprintln!(
"No matches for query: `{raw_query}` (FTS expression: `{fts_expr}`)"
);
} else {
for p in hits {
println!("{}", p);
println!("{p}");
}
}
}
@@ -279,10 +336,81 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option<String>
Ok(())
}
/// Quote terms for FTS when needed.
fn escape_fts_query_term(term: &str) -> String {
/// Simple, case-insensitive substring scan over paths and (small) file bodies.
fn naive_substring_search(conn: &rusqlite::Connection, term: &str) -> Result<Vec<String>> {
let term_lc = term.to_lowercase();
let mut stmt = conn.prepare("SELECT path FROM files")?;
let rows = stmt.query_map([], |r| r.get::<_, String>(0))?;
let mut out = Vec::new();
for p in rows {
let p = p?;
if p.to_lowercase().contains(&term_lc) {
out.push(p.clone());
continue;
}
// Only inspect small files to stay fast
if let Ok(meta) = fs::metadata(&p) {
if meta.len() > 64_000 {
continue;
}
}
if let Ok(content) = fs::read_to_string(&p) {
if content.to_lowercase().contains(&term_lc) {
out.push(p);
}
}
}
Ok(out)
}
/// Helper: run an external command template on every hit.
fn run_exec(paths: &[String], cmd_tpl: &str) -> Result<()> {
let mut ran_without_placeholder = false;
if paths.is_empty() && !cmd_tpl.contains("{}") {
if let Some(mut parts) = shlex::split(cmd_tpl) {
if !parts.is_empty() {
let prog = parts.remove(0);
let status = Command::new(&prog).args(&parts).status()?;
if !status.success() {
error!(command = %cmd_tpl, code = ?status.code(), "command failed");
}
}
}
ran_without_placeholder = true;
}
if !ran_without_placeholder {
for p in paths {
let quoted = shlex::try_quote(p).unwrap_or_else(|_| p.into());
let final_cmd = if cmd_tpl.contains("{}") {
cmd_tpl.replace("{}", &quoted)
} else {
format!("{cmd_tpl} {quoted}")
};
if let Some(mut parts) = shlex::split(&final_cmd) {
if parts.is_empty() {
continue;
}
let prog = parts.remove(0);
let status = Command::new(&prog).args(&parts).status()?;
if !status.success() {
error!(file = %p, command = %final_cmd, code = ?status.code(), "command failed");
}
}
}
}
Ok(())
}
/* ---------- misc helpers ---------- */
fn escape_fts(term: &str) -> String {
if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c))
|| ["AND","OR","NOT","NEAR"].contains(&term.to_uppercase().as_str())
|| ["AND", "OR", "NOT", "NEAR"]
.contains(&term.to_uppercase().as_str())
{
format!("\"{}\"", term.replace('"', "\"\""))
} else {
@@ -292,16 +420,22 @@ fn escape_fts_query_term(term: &str) -> String {
/// Determine a filesystem root to limit recursive walking.
fn determine_scan_root(pattern: &str) -> PathBuf {
let wildcard_pos = pattern.find(|c| c == '*' || c == '?' || c == '[').unwrap_or(pattern.len());
let prefix = &pattern[..wildcard_pos];
let mut root = PathBuf::from(prefix);
while root.as_os_str().to_string_lossy().contains(|c| ['*','?','['].contains(&c)) {
if let Some(parent) = root.parent() {
root = parent.to_path_buf();
} else {
root = PathBuf::from(".");
break;
}
let first_wild = pattern
.find(|c| matches!(c, '*' | '?' | '['))
.unwrap_or(pattern.len());
let mut root = PathBuf::from(&pattern[..first_wild]);
while root
.as_os_str()
.to_string_lossy()
.contains(|c| matches!(c, '*' | '?' | '['))
{
root = root.parent().map(Path::to_path_buf).unwrap_or_default();
}
if root.as_os_str().is_empty() {
PathBuf::from(".")
} else {
root
}
root
}

View File

@@ -0,0 +1,240 @@
// Test script to validate hierarchical tag FTS fix
// This script demonstrates how the fix works with a simple test case
use rusqlite::{Connection, params};
use std::path::Path;
use std::fs;
use anyhow::Result;
fn main() -> Result<()> {
// Create a test database in a temporary location
let db_path = Path::new("/tmp/marlin_test.db");
if db_path.exists() {
fs::remove_file(db_path)?;
}
println!("Creating test database at {:?}", db_path);
// Initialize database with our schema and migrations
let conn = Connection::open(db_path)?;
// Apply schema (simplified version of what's in the migrations)
println!("Applying schema...");
conn.execute_batch(
"PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
CREATE TABLE files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
size INTEGER,
mtime INTEGER,
hash TEXT
);
CREATE TABLE tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
UNIQUE(name, parent_id)
);
CREATE TABLE file_tags (
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY(file_id, tag_id)
);
CREATE TABLE attributes (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT,
UNIQUE(file_id, key)
);
CREATE VIRTUAL TABLE files_fts
USING fts5(
path,
tags_text,
attrs_text,
content='',
tokenize=\"unicode61 remove_diacritics 2\"
);"
)?;
// Apply our fixed triggers
println!("Applying fixed FTS triggers...");
conn.execute_batch(
"CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id,
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path as tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = NEW.id
UNION
SELECT t.name as tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path as tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name as tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;"
)?;
// Insert test data
println!("Inserting test data...");
// Insert a test file
conn.execute(
"INSERT INTO files (id, path) VALUES (1, '/test/document.md')",
[],
)?;
// Create hierarchical tags: project/md
println!("Creating hierarchical tags: project/md");
// Insert parent tag 'project'
conn.execute(
"INSERT INTO tags (id, name, parent_id) VALUES (1, 'project', NULL)",
[],
)?;
// Insert child tag 'md' under 'project'
conn.execute(
"INSERT INTO tags (id, name, parent_id) VALUES (2, 'md', 1)",
[],
)?;
// Tag the file with the 'md' tag (which is under 'project')
conn.execute(
"INSERT INTO file_tags (file_id, tag_id) VALUES (1, 2)",
[],
)?;
// Check what's in the FTS index
println!("\nChecking FTS index content:");
let mut stmt = conn.prepare("SELECT rowid, path, tags_text, attrs_text FROM files_fts")?;
let rows = stmt.query_map([], |row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
))
})?;
for row in rows {
let (id, path, tags, attrs) = row?;
println!("ID: {}, Path: {}, Tags: '{}', Attrs: '{}'", id, path, tags, attrs);
}
// Test searching for the full hierarchical tag path
println!("\nTesting search for 'project/md':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project/md'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'project/md'");
}
// Test searching for just the parent tag
println!("\nTesting search for just 'project':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'project'");
}
// Test searching for just the child tag
println!("\nTesting search for just 'md':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'md'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'md'");
}
println!("\nTest completed successfully!");
Ok(())
}

Binary file not shown.

View File

@@ -1 +1 @@
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs

121
tests/e2e.rs Normal file
View File

@@ -0,0 +1,121 @@
//! End-to-end “happy path” smoke-tests for the `marlin` binary.
//!
//! Run with `cargo test --test e2e` (CI does) or `cargo test`.
use assert_cmd::prelude::*;
use predicates::prelude::*;
use std::{fs, path::PathBuf, process::Command};
use tempfile::tempdir;
/// Absolute path to the freshly-built `marlin` binary.
fn marlin_bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_marlin"))
}
/// Create the demo directory structure and seed files.
fn spawn_demo_tree(root: &PathBuf) {
fs::create_dir_all(root.join("Projects/Alpha")).unwrap();
fs::create_dir_all(root.join("Projects/Beta")).unwrap();
fs::create_dir_all(root.join("Projects/Gamma")).unwrap();
fs::create_dir_all(root.join("Logs")).unwrap();
fs::create_dir_all(root.join("Reports")).unwrap();
fs::write(root.join("Projects/Alpha/draft1.md"), "- [ ] TODO foo\n").unwrap();
fs::write(root.join("Projects/Alpha/draft2.md"), "- [x] TODO foo\n").unwrap();
fs::write(root.join("Projects/Beta/final.md"), "done\n").unwrap();
fs::write(root.join("Projects/Gamma/TODO.txt"), "TODO bar\n").unwrap();
fs::write(root.join("Logs/app.log"), "ERROR omg\n").unwrap();
fs::write(root.join("Reports/Q1.pdf"), "PDF\n").unwrap();
}
/// Shorthand for “run and must succeed”.
fn ok(cmd: &mut Command) -> assert_cmd::assert::Assert {
cmd.assert().success()
}
#[test]
fn full_cli_flow() -> Result<(), Box<dyn std::error::Error>> {
/* ── 1 ░ sandbox ───────────────────────────────────────────── */
let tmp = tempdir()?; // wiped on drop
let demo_dir = tmp.path().join("marlin_demo");
spawn_demo_tree(&demo_dir);
let db_path = demo_dir.join("index.db");
// Helper to spawn a fresh `marlin` Command with the DB env-var set
let marlin = || {
let mut c = Command::new(marlin_bin());
c.env("MARLIN_DB_PATH", &db_path);
c
};
/* ── 2 ░ init ( auto-scan cwd ) ───────────────────────────── */
ok(marlin()
.current_dir(&demo_dir)
.arg("init"));
/* ── 3 ░ tag & attr demos ─────────────────────────────────── */
ok(marlin()
.arg("tag")
.arg(format!("{}/Projects/**/*.md", demo_dir.display()))
.arg("project/md"));
ok(marlin()
.arg("attr")
.arg("set")
.arg(format!("{}/Reports/*.pdf", demo_dir.display()))
.arg("reviewed")
.arg("yes"));
/* ── 4 ░ quick search sanity checks ───────────────────────── */
marlin()
.arg("search").arg("TODO")
.assert()
.stdout(predicate::str::contains("TODO.txt"));
marlin()
.arg("search").arg("attr:reviewed=yes")
.assert()
.stdout(predicate::str::contains("Q1.pdf"));
/* ── 5 ░ link flow & backlinks ────────────────────────────── */
let foo = demo_dir.join("foo.txt");
let bar = demo_dir.join("bar.txt");
fs::write(&foo, "")?;
fs::write(&bar, "")?;
ok(marlin().arg("scan").arg(&demo_dir));
ok(marlin()
.arg("link").arg("add")
.arg(&foo).arg(&bar));
marlin()
.arg("link").arg("backlinks").arg(&bar)
.assert()
.stdout(predicate::str::contains("foo.txt"));
/* ── 6 ░ backup → delete DB → restore ────────────────────── */
let backup_path = String::from_utf8(
marlin().arg("backup").output()?.stdout
)?;
let backup_file = backup_path.split_whitespace().last().unwrap();
fs::remove_file(&db_path)?; // simulate corruption
ok(marlin().arg("restore").arg(backup_file)); // restore
// Search must still work afterwards
marlin()
.arg("search").arg("TODO")
.assert()
.stdout(predicate::str::contains("TODO.txt"));
Ok(())
}

81
tests/neg.rs Normal file
View File

@@ -0,0 +1,81 @@
//! Negative-path integration tests (“should fail / warn”).
use predicates::str;
use tempfile::tempdir;
mod util;
use util::marlin;
/* ───────────────────────── LINKS ─────────────────────────────── */
#[test]
fn link_non_indexed_should_fail() {
let tmp = tempdir().unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
std::fs::write(tmp.path().join("foo.txt"), "").unwrap();
std::fs::write(tmp.path().join("bar.txt"), "").unwrap();
marlin(&tmp)
.current_dir(tmp.path())
.args([
"link", "add",
&tmp.path().join("foo.txt").to_string_lossy(),
&tmp.path().join("bar.txt").to_string_lossy()
])
.assert()
.failure()
.stderr(str::contains("file not indexed"));
}
/* ───────────────────────── ATTR ─────────────────────────────── */
#[test]
fn attr_set_on_non_indexed_file_should_warn() {
let tmp = tempdir().unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
let ghost = tmp.path().join("ghost.txt");
std::fs::write(&ghost, "").unwrap();
marlin(&tmp)
.args(["attr","set",
&ghost.to_string_lossy(),"foo","bar"])
.assert()
.success() // exits 0
.stderr(str::contains("not indexed"));
}
/* ───────────────────── COLLECTIONS ───────────────────────────── */
#[test]
fn coll_add_unknown_collection_should_fail() {
let tmp = tempdir().unwrap();
let file = tmp.path().join("doc.txt");
std::fs::write(&file, "").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
marlin(&tmp)
.args(["coll","add","nope",&file.to_string_lossy()])
.assert()
.failure();
}
/* ───────────────────── RESTORE (bad file) ───────────────────── */
#[test]
fn restore_with_nonexistent_backup_should_fail() {
let tmp = tempdir().unwrap();
// create an empty DB first
marlin(&tmp).arg("init").assert().success();
marlin(&tmp)
.args(["restore", "/definitely/not/here.db"])
.assert()
.failure()
.stderr(str::contains("Failed to restore"));
}

171
tests/pos.rs Normal file
View File

@@ -0,0 +1,171 @@
//! Positive-path integration checks for every sub-command
//! that already has real logic behind it.
mod util;
use util::marlin;
use predicates::{prelude::*, str}; // brings `PredicateBooleanExt::and`
use std::fs;
use tempfile::tempdir;
/* ─────────────────────────── TAG ─────────────────────────────── */
#[test]
fn tag_should_add_hierarchical_tag_and_search_finds_it() {
let tmp = tempdir().unwrap();
let file = tmp.path().join("foo.md");
fs::write(&file, "# test\n").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
marlin(&tmp)
.args(["tag", file.to_str().unwrap(), "project/md"])
.assert().success();
marlin(&tmp)
.args(["search", "tag:project/md"])
.assert()
.success()
.stdout(str::contains("foo.md"));
}
/* ─────────────────────────── ATTR ────────────────────────────── */
#[test]
fn attr_set_then_ls_roundtrip() {
let tmp = tempdir().unwrap();
let file = tmp.path().join("report.pdf");
fs::write(&file, "%PDF-1.4\n").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
marlin(&tmp)
.args(["attr", "set", file.to_str().unwrap(), "reviewed", "yes"])
.assert().success();
marlin(&tmp)
.args(["attr", "ls", file.to_str().unwrap()])
.assert()
.success()
.stdout(str::contains("reviewed = yes"));
}
/* ─────────────────────── COLLECTIONS ────────────────────────── */
#[test]
fn coll_create_add_and_list() {
let tmp = tempdir().unwrap();
let a = tmp.path().join("a.txt");
let b = tmp.path().join("b.txt");
fs::write(&a, "").unwrap();
fs::write(&b, "").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
marlin(&tmp).args(["coll", "create", "Set"]).assert().success();
for f in [&a, &b] {
marlin(&tmp).args(["coll", "add", "Set", f.to_str().unwrap()]).assert().success();
}
marlin(&tmp)
.args(["coll", "list", "Set"])
.assert()
.success()
.stdout(str::contains("a.txt").and(str::contains("b.txt")));
}
/* ─────────────────────────── VIEWS ───────────────────────────── */
#[test]
fn view_save_list_and_exec() {
let tmp = tempdir().unwrap();
let todo = tmp.path().join("TODO.txt");
fs::write(&todo, "remember the milk\n").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
// save & list
marlin(&tmp).args(["view", "save", "tasks", "milk"]).assert().success();
marlin(&tmp)
.args(["view", "list"])
.assert()
.success()
.stdout(str::contains("tasks: milk"));
// exec
marlin(&tmp)
.args(["view", "exec", "tasks"])
.assert()
.success()
.stdout(str::contains("TODO.txt"));
}
/* ─────────────────────────── LINKS ───────────────────────────── */
#[test]
fn link_add_rm_and_list() {
let tmp = tempdir().unwrap();
let foo = tmp.path().join("foo.txt");
let bar = tmp.path().join("bar.txt");
fs::write(&foo, "").unwrap();
fs::write(&bar, "").unwrap();
// handy closure
let mc = || marlin(&tmp);
mc().current_dir(tmp.path()).arg("init").assert().success();
mc().args(["scan", tmp.path().to_str().unwrap()]).assert().success();
// add
mc().args(["link", "add", foo.to_str().unwrap(), bar.to_str().unwrap()])
.assert().success();
// list (outgoing default)
mc().args(["link", "list", foo.to_str().unwrap()])
.assert().success()
.stdout(str::contains("foo.txt").and(str::contains("bar.txt")));
// remove
mc().args(["link", "rm", foo.to_str().unwrap(), bar.to_str().unwrap()])
.assert().success();
// list now empty
mc().args(["link", "list", foo.to_str().unwrap()])
.assert().success()
.stdout(str::is_empty());
}
/* ─────────────────────── SCAN (multi-path) ───────────────────── */
#[test]
fn scan_with_multiple_paths_indexes_all() {
let tmp = tempdir().unwrap();
let dir_a = tmp.path().join("A");
let dir_b = tmp.path().join("B");
std::fs::create_dir_all(&dir_a).unwrap();
std::fs::create_dir_all(&dir_b).unwrap();
let f1 = dir_a.join("one.txt");
let f2 = dir_b.join("two.txt");
fs::write(&f1, "").unwrap();
fs::write(&f2, "").unwrap();
marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success();
// multi-path scan
marlin(&tmp)
.args(["scan", dir_a.to_str().unwrap(), dir_b.to_str().unwrap()])
.assert().success();
// both files findable
for term in ["one.txt", "two.txt"] {
marlin(&tmp).args(["search", term])
.assert()
.success()
.stdout(str::contains(term));
}
}

68
tests/test.md Normal file
View File

@@ -0,0 +1,68 @@
# Testing
Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
---
## 0 Prepare once
```bash
# Run once (or add to ~/.bashrc) so debug + release artefacts land
# in the same predictable place. Speeds-up future builds.
export CARGO_TARGET_DIR=target
```
---
## 1 Build the new binary
```bash
git pull # grab the latest commit
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
* `cargo build --release` builds the optimised binary.
* `install …` copies it into your `$PATH` so `marlin` on the CLI is the fresh one.
---
## 2 Run the smoke-test suite
```bash
# Runs the end-to-end test we added in tests/e2e.rs
cargo test --test e2e -- --nocapture
```
* `--test e2e` compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like).
* `--nocapture` streams stdout/stderr so you can watch each CLI step in real time.
* Exit-code **0** ➜ everything passed.
Any non-zero exit or a red ✗ line means a step failed; the asserts diff will show the command and its output.
---
## 3 (Optionally) run all tests
```bash
cargo test --all -- --nocapture
```
This will execute:
* unit tests in `src/**`
* every file in `tests/`
* doc-tests
If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically.
---
### One-liner helper (copy/paste)
```bash
git pull && cargo build --release &&
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
cargo test --all -- --nocapture
```
Stick that in a shell alias (`alias marlin-ci='…'`) and youve got a 5-second upgrade-and-verify loop.

23
tests/util.rs Normal file
View File

@@ -0,0 +1,23 @@
//! tests/util.rs
//! Small helpers shared across integration tests.
use std::path::{Path, PathBuf};
use tempfile::TempDir;
use assert_cmd::Command;
/// Absolute path to the freshly-built `marlin` binary.
pub fn bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_marlin"))
}
/// Build a `Command` for `marlin` whose `MARLIN_DB_PATH` is
/// `<tmp>/index.db`.
///
/// Each call yields a brand-new `Command`, so callers can freely add
/// arguments, change the working directory, etc., without affecting
/// other invocations.
pub fn marlin(tmp: &TempDir) -> Command {
let db_path: &Path = &tmp.path().join("index.db");
let mut cmd = Command::new(bin());
cmd.env("MARLIN_DB_PATH", db_path);
cmd
}