diff --git a/Cargo.lock b/Cargo.lock index 9e46be2..cb384f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,22 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +[[package]] +name = "assert_cmd" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "libc", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -106,6 +122,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "regex-automata 0.4.9", + "serde", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -202,6 +229,12 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "directories" version = "5.0.1" @@ -211,6 +244,15 @@ dependencies = [ "dirs-sys 0.4.1", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" @@ -244,6 +286,22 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "errno" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -256,6 +314,21 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -264,7 +337,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -370,6 +455,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "log" version = "0.4.27" @@ -381,14 +472,18 @@ name = "marlin" version = "0.1.0" dependencies = [ "anyhow", + "assert_cmd", "chrono", "clap", "clap_complete", "directories", + "dirs 5.0.1", "glob", + "predicates", "rusqlite", "shellexpand", "shlex", + "tempfile", "tracing", "tracing-subscriber", "walkdir", @@ -409,6 +504,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -458,6 +559,36 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -476,13 +607,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "redox_users" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 1.0.69", ] @@ -493,7 +630,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 2.0.12", ] @@ -556,6 +693,19 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rustversion" version = "1.0.20" @@ -571,6 +721,26 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -586,7 +756,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" dependencies = [ - "dirs", + "dirs 6.0.0", ] [[package]] @@ -618,6 +788,25 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + [[package]] name = "thiserror" version = "1.0.69" @@ -759,6 +948,15 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -775,6 +973,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -1062,6 +1269,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "zerocopy" version = "0.8.25" diff --git a/Cargo.toml b/Cargo.toml index 1bc20b9..da85dad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,9 @@ shlex = "1.3" chrono = "0.4" shellexpand = "3.1" clap_complete = "4.1" + +[dev-dependencies] +assert_cmd = "2" +predicates = "3" +tempfile = "3" +dirs = "5" # cross-platform data dir helper diff --git a/README.md b/README.md index c4a57b2..a615e86 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,74 @@ sudo install -Dm755 target/release/marlin /usr/local/bin/marlin For a concise walkthrough, see [Quick start & Demo](marlin_demo.md). +## Testing + +Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**. + +--- + +### 0 Prepare once + +```bash +# Run once (or add to ~/.bashrc) so debug + release artefacts land +# in the same predictable place. Speeds-up future builds. +export CARGO_TARGET_DIR=target +``` + +--- + +### 1 Build the new binary + +```bash +git pull # grab the latest commit +cargo build --release +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin +``` + +* `cargo build --release` – builds the optimised binary. +* `install …` – copies it into your `$PATH` so `marlin` on the CLI is the fresh one. + +--- + +### 2 Run the smoke-test suite + +```bash +# Runs the end-to-end test we added in tests/e2e.rs +cargo test --test e2e -- --nocapture +``` + +* `--test e2e` – compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like). +* `--nocapture` – streams stdout/stderr so you can watch each CLI step in real time. +* Exit-code **0** ➜ everything passed. + Any non-zero exit or a red ✗ line means a step failed; the assert’s diff will show the command and its output. + +--- + +### 3 (Optionally) run all tests + +```bash +cargo test --all -- --nocapture +``` + +This will execute: + +* unit tests in `src/**` +* every file in `tests/` +* doc-tests + +If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically. + +--- + +#### One-liner helper (copy/paste) + +```bash +git pull && cargo build --release && +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin && +cargo test --test e2e -- --nocapture +``` + +Stick that in a shell alias (`alias marlin-ci='…'`) and you’ve got a 5-second upgrade-and-verify loop. ### Database location @@ -129,112 +197,6 @@ The versioned migration system preserves your data across upgrades. --- -## Five-Minute Quickstart - -Just paste & run each block in your terminal. - -### 0 Prepare, build & install - -```bash -cd ~/Documents/GitHub/Marlin -cargo build --release -sudo install -Dm755 target/release/marlin /usr/local/bin/marlin -``` - -> Now `marlin` is available everywhere. - -### 1 Enable shell completion - -```bash -mkdir -p ~/.config/bash_completion.d -marlin completions bash > ~/.config/bash_completion.d/marlin -``` - -### 2 Prepare a clean demo directory - -```bash -rm -rf ~/marlin_demo -mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs} - -printf "Alpha draft\n" > ~/marlin_demo/Projects/Alpha/draft.txt -printf "Beta notes\n" > ~/marlin_demo/Projects/Beta/notes.md -printf "Receipt PDF\n" > ~/marlin_demo/Docs/receipt.pdf -printf "fake jpg\n" > ~/marlin_demo/Media/Photos/vacation.jpg -``` - -### 3 Initialize & index files - -```bash -marlin init -marlin scan ~/marlin_demo - -# show every path tested: -marlin --verbose scan ~/marlin_demo -``` - -> Only changed files get re-indexed on subsequent runs. - -### 4 Attach tags & attributes - -```bash -# Tag everything under “Alpha” -marlin tag ~/marlin_demo/Projects/Alpha/**/* project/alpha - -# Mark all PDFs as reviewed -marlin attr set ~/marlin_demo/**/*.pdf reviewed yes - -# Output as JSON instead: -marlin --format=json attr set ~/marlin_demo/**/*.pdf reviewed yes -``` - -### 5 Search your index - -```bash -# By tag or filename -marlin search alpha - -# Combined terms: -marlin search "reviewed AND pdf" - -# Run a command on each hit: -marlin search reviewed --exec 'echo HIT → {}' -``` - -### 6 Backup & restore - -```bash -# Snapshot -snap=$(marlin backup | awk '{print $NF}') - -# Simulate loss -rm ~/.local/share/marlin/index.db - -# Restore -marlin restore "$snap" - -# Verify -marlin search reviewed -``` - ---- - -##### What you just exercised - -| Command | Purpose | -| ----------------- | ----------------------------------------- | -| `marlin init` | Create / upgrade the SQLite database | -| `marlin scan` | Walk directories and (re)index files | -| `marlin tag` | Attach hierarchical tags | -| `marlin attr set` | Add/overwrite custom key-value attributes | -| `marlin search` | FTS5 search across path / tags / attrs | -| `--exec` | Pipe hits into any shell command | -| `marlin backup` | Timestamped snapshot of the DB | -| `marlin restore` | Replace live DB with a chosen snapshot | - -That’s the complete surface area of Marlin today—feel free to play around or point the scanner at real folders. - ---- - ## License MIT – see `LICENSE` diff --git a/marlin_demo.md b/marlin_demo.md index 7f51e19..6b5b3ac 100644 --- a/marlin_demo.md +++ b/marlin_demo.md @@ -1,19 +1,23 @@ -# Marlin Demo +# Marlin Demo -Here’s a little demo you can spin up to exercise tags, attributes, FTS queries, `--exec` hooks, backups & restores, and linking. Just copy–paste each block into your terminal: +Below is the **“hello-world” demo** that matches the current master branch (auto-scan on `marlin init`, no more forced-migration noise, and cleaner build). --- -### 0 Create the demo folder and some files - -```bash -cargo build --release -``` +## 0 Build & install Marlin ```bash +# inside the repo +cargo build --release # build the new binary sudo install -Dm755 target/release/marlin /usr/local/bin/marlin ``` +*(`cargo install --path . --locked --force` works too if you prefer.)* + +--- + +## 1 Create the demo tree + ```bash rm -rf ~/marlin_demo mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta,Gamma},Logs,Reports,Scripts,Media/Photos} @@ -72,18 +76,31 @@ chmod +x ~/marlin_demo/Scripts/deploy.sh echo "JPEGDATA" > ~/marlin_demo/Media/Photos/event.jpg ``` +*(copy the file-creation block from your original instructions — nothing about the files needs to change)* + --- -### 1 Initialize & index +## 2 Initialise **and** index (one step) + +`marlin init` now performs a first-time scan of whatever directory you run it in. +So just: ```bash +cd ~/marlin_demo # <-- important: run init from the folder you want indexed marlin init -marlin scan ~/marlin_demo ``` +That will: + +1. create/upgrade the DB, +2. run all migrations exactly once, +3. walk the current directory and ingest every file it finds. + +Need to add more paths later? Use `marlin scan ` exactly as before. + --- -### 2 Attach hierarchical tags +## 3 Tagging examples ```bash # Tag all project markdown as “project/md” @@ -98,101 +115,69 @@ marlin tag "~/marlin_demo/Projects/Beta/**/*" project/beta --- -### 3 Set custom attributes +## 4 Set custom attributes ```bash -# Mark only the “final.md” as complete -marlin attr set "~/marlin_demo/Projects/Beta/final.md" status complete - -# Mark PDF as reviewed -marlin attr set "~/marlin_demo/Reports/*.pdf" reviewed yes +marlin attr set "~/marlin_demo/Projects/Beta/final.md" status complete +marlin attr set "~/marlin_demo/Reports/*.pdf" reviewed yes ``` --- -### 4 Play with search +## 5 Play with search / exec hooks ```bash -# Find all TODOs (in any file) marlin search TODO - -# All markdown under your “project/md” tag marlin search tag:project/md - -# All files tagged “logs/app” containing ERROR marlin search "tag:logs/app AND ERROR" - -# Only your completed Beta deliverable marlin search "attr:status=complete" - -# Only reviewed PDFs marlin search "attr:reviewed=yes AND pdf" - -# Open every reviewed report marlin search "attr:reviewed=yes" --exec 'xdg-open {}' ``` --- -### 5 Try JSON output & verbose mode +## 6 JSON output & verbose mode ```bash marlin --format=json attr ls ~/marlin_demo/Projects/Beta/final.md -marlin --verbose scan ~/marlin_demo +marlin --verbose scan ~/marlin_demo # re-scan to see debug logs ``` --- -### 6 Snapshot & restore +## 7 Snapshot & restore ```bash -# Snapshot snap=$(marlin backup | awk '{print $NF}') - -# Delete your DB to simulate data loss -rm ~/.local/share/marlin/index.db - -# Bring it back +rm ~/.local/share/marlin/index.db # simulate disaster marlin restore "$snap" - -# Confirm you still see “TODO” -marlin search TODO +marlin search TODO # should still work ``` --- -### 7 Test linking functionality +## 8 Linking demo ```bash -# Create two demo files touch ~/marlin_demo/foo.txt ~/marlin_demo/bar.txt +marlin scan ~/marlin_demo # index the new files -# Re-scan to index new files -marlin scan ~/marlin_demo - -# Link foo.txt → bar.txt foo=~/marlin_demo/foo.txt bar=~/marlin_demo/bar.txt -marlin link add "$foo" "$bar" -# List outgoing links for foo.txt -marlin link list "$foo" - -# List incoming links (backlinks) to bar.txt -marlin link backlinks "$bar" +marlin link add "$foo" "$bar" # create link +marlin link list "$foo" # outgoing links from foo +marlin link backlinks "$bar" # incoming links to bar ``` --- -That gives you: +### Recap -* **wide folder structures** (Projects, Logs, Reports, Scripts, Media) -* **hierarchical tags** you can mix and match -* **key-value attributes** to flag state & review -* **FTS5 queries** with AND/OR/NOT -* **`--exec` hooks** to trigger external commands -* **JSON output** for programmatic gluing -* **backups & restores** to guard your data -* **file-to-file links** for graph relationships +* `cargo build --release` + `sudo install …` is still the build path. +* **`cd` to the folder you want indexed and run `marlin init`** — first scan happens automatically. +* Subsequent scans (`marlin scan …`) are only needed for *new* directories you add later. +* No more “forcing reapplication of migration 4” banner and the unused-import warnings are gone. -Have fun playing around! +Happy organising! diff --git a/src/db/migrations/0004_fix_hierarchical_tags_fts.sql b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql new file mode 100644 index 0000000..273079e --- /dev/null +++ b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql @@ -0,0 +1,289 @@ +-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql +PRAGMA foreign_keys = ON; +PRAGMA journal_mode = WAL; + +-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded +DROP TRIGGER IF EXISTS files_fts_ai_file; +DROP TRIGGER IF EXISTS files_fts_au_file; +DROP TRIGGER IF EXISTS files_fts_ad_file; +DROP TRIGGER IF EXISTS file_tags_fts_ai; +DROP TRIGGER IF EXISTS file_tags_fts_ad; +DROP TRIGGER IF EXISTS attributes_fts_ai; +DROP TRIGGER IF EXISTS attributes_fts_au; +DROP TRIGGER IF EXISTS attributes_fts_ad; + +-- Create a new trigger for file insertion that uses recursive CTE for full tag paths +CREATE TRIGGER files_fts_ai_file +AFTER INSERT ON files +BEGIN + INSERT INTO files_fts(rowid, path, tags_text, attrs_text) + VALUES ( + NEW.id, + NEW.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = NEW.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = NEW.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = NEW.id) + ); +END; + +-- Recreate the file path update trigger +CREATE TRIGGER files_fts_au_file +AFTER UPDATE OF path ON files +BEGIN + UPDATE files_fts + SET path = NEW.path + WHERE rowid = NEW.id; +END; + +-- Recreate the file deletion trigger +CREATE TRIGGER files_fts_ad_file +AFTER DELETE ON files +BEGIN + DELETE FROM files_fts WHERE rowid = OLD.id; +END; + +-- Create new trigger for tag insertion that uses recursive CTE for full tag paths +CREATE TRIGGER file_tags_fts_ai +AFTER INSERT ON file_tags +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +-- Create new trigger for tag deletion that uses recursive CTE for full tag paths +CREATE TRIGGER file_tags_fts_ad +AFTER DELETE ON file_tags +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = OLD.file_id; +END; + +-- Create new triggers for attribute operations that use recursive CTE for full tag paths +CREATE TRIGGER attributes_fts_ai +AFTER INSERT ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +CREATE TRIGGER attributes_fts_au +AFTER UPDATE OF value ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +CREATE TRIGGER attributes_fts_ad +AFTER DELETE ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = OLD.file_id; +END; + +-- Update all existing FTS entries with the new tag-path format +INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) +SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) +FROM files f; diff --git a/src/db/migrations/mod.rs b/src/db/migrations/mod.rs new file mode 100644 index 0000000..2e00341 --- /dev/null +++ b/src/db/migrations/mod.rs @@ -0,0 +1,260 @@ +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{Context, Result}; +use chrono::Local; +use rusqlite::{ + backup::{Backup, StepResult}, + params, + Connection, + OpenFlags, + OptionalExtension, +}; +use tracing::{debug, info}; + +/// Embed every numbered migration file here. +const MIGRATIONS: &[(&str, &str)] = &[ + ("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")), + ("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")), + ("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")), + ("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")), +]; + +/* ─── connection bootstrap ──────────────────────────────────────────── */ + +pub fn open>(db_path: P) -> Result { + let db_path_ref = db_path.as_ref(); + let mut conn = Connection::open(db_path_ref) + .with_context(|| format!("failed to open DB at {}", db_path_ref.display()))?; + + conn.pragma_update(None, "journal_mode", "WAL")?; + conn.pragma_update(None, "foreign_keys", "ON")?; + + // Apply migrations (drops & recreates all FTS triggers) + apply_migrations(&mut conn)?; + + Ok(conn) +} + +/* ─── migration runner ──────────────────────────────────────────────── */ + +fn apply_migrations(conn: &mut Connection) -> Result<()> { + // Ensure schema_version table + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + applied_on TEXT NOT NULL + );", + )?; + + // Legacy patch (ignore if exists) + let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []); + + let tx = conn.transaction()?; + + for (fname, sql) in MIGRATIONS { + let version: i64 = fname + .split('_') + .next() + .and_then(|s| s.parse().ok()) + .expect("migration filenames start with number"); + + let already: Option = tx + .query_row( + "SELECT version FROM schema_version WHERE version = ?1", + [version], + |r| r.get(0), + ) + .optional()?; + + if already.is_some() { + debug!("migration {} already applied", fname); + continue; + } + + info!("applying migration {}", fname); + println!( + "\nSQL SCRIPT FOR MIGRATION: {}\nBEGIN SQL >>>\n{}\n<<< END SQL\n", + fname, sql + ); + + tx.execute_batch(sql) + .with_context(|| format!("could not apply migration {}", fname))?; + + tx.execute( + "INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)", + params![version, Local::now().to_rfc3339()], + )?; + } + + tx.commit()?; + Ok(()) +} + +/* ─── helpers ───────────────────────────────────────────────────────── */ + +pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result { + let mut parent: Option = None; + for segment in path.split('/').filter(|s| !s.is_empty()) { + conn.execute( + "INSERT OR IGNORE INTO tags(name, parent_id) VALUES (?1, ?2)", + params![segment, parent], + )?; + let id: i64 = conn.query_row( + "SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)", + params![segment, parent], + |row| row.get(0), + )?; + parent = Some(id); + } + parent.ok_or_else(|| anyhow::anyhow!("empty tag path")) +} + +pub fn file_id(conn: &Connection, path: &str) -> Result { + conn.query_row("SELECT id FROM files WHERE path = ?1", [path], |r| r.get(0)) + .map_err(|_| anyhow::anyhow!("file not indexed: {}", path)) +} + +pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> { + conn.execute( + r#" + INSERT INTO attributes(file_id, key, value) + VALUES (?1, ?2, ?3) + ON CONFLICT(file_id, key) DO UPDATE SET value = excluded.value + "#, + params![file_id, key, value], + )?; + Ok(()) +} + +/// Add a typed link from one file to another. +pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { + conn.execute( + "INSERT INTO links(src_file_id, dst_file_id, type) + VALUES (?1, ?2, ?3) + ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING", + params![src_file_id, dst_file_id, link_type], + )?; + Ok(()) +} + +/// Remove a typed link between two files. +pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { + conn.execute( + "DELETE FROM links + WHERE src_file_id = ?1 + AND dst_file_id = ?2 + AND (type IS ?3 OR type = ?3)", + params![src_file_id, dst_file_id, link_type], + )?; + Ok(()) +} + +/// List all links for files matching a glob-style pattern. +/// `direction` may be `"in"` (incoming), `"out"` (outgoing), or `None` (outgoing). +pub fn list_links( + conn: &Connection, + pattern: &str, + direction: Option<&str>, + link_type: Option<&str>, +) -> Result)>> { + // Convert glob '*' → SQL LIKE '%' + let like_pattern = pattern.replace('*', "%"); + + // Find matching files + let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?; + let mut rows = stmt.query(params![like_pattern])?; + let mut files = Vec::new(); + while let Some(row) = rows.next()? { + let id: i64 = row.get(0)?; + let path: String = row.get(1)?; + files.push((id, path)); + } + + let mut results = Vec::new(); + for (file_id, file_path) in files { + let (src_col, dst_col) = match direction { + Some("in") => ("dst_file_id", "src_file_id"), + _ => ("src_file_id", "dst_file_id"), + }; + + let sql = format!( + "SELECT f2.path, l.type + FROM links l + JOIN files f2 ON f2.id = l.{dst} + WHERE l.{src} = ?1 + AND (?2 IS NULL OR l.type = ?2)", + src = src_col, + dst = dst_col, + ); + + let mut stmt2 = conn.prepare(&sql)?; + let mut rows2 = stmt2.query(params![file_id, link_type])?; + while let Some(r2) = rows2.next()? { + let other: String = r2.get(0)?; + let typ: Option = r2.get(1)?; + results.push((file_path.clone(), other, typ)); + } + } + + Ok(results) +} + +/// Find all incoming links (backlinks) to files matching a pattern. +pub fn find_backlinks(conn: &Connection, pattern: &str) -> Result)>> { + let like_pattern = pattern.replace('*', "%"); + let mut stmt = conn.prepare( + "SELECT f1.path, l.type + FROM links l + JOIN files f1 ON f1.id = l.src_file_id + JOIN files f2 ON f2.id = l.dst_file_id + WHERE f2.path LIKE ?1", + )?; + let mut rows = stmt.query(params![like_pattern])?; + let mut result = Vec::new(); + while let Some(row) = rows.next()? { + let src_path: String = row.get(0)?; + let typ: Option = row.get(1)?; + result.push((src_path, typ)); + } + Ok(result) +} + +/* ─── backup / restore ──────────────────────────────────────────────── */ + +pub fn backup>(db_path: P) -> Result { + let src = db_path.as_ref(); + let dir = src + .parent() + .ok_or_else(|| anyhow::anyhow!("invalid DB path: {}", src.display()))? + .join("backups"); + fs::create_dir_all(&dir)?; + + let stamp = Local::now().format("%Y-%m-%d_%H-%M-%S"); + let dst = dir.join(format!("backup_{stamp}.db")); + + let src_conn = Connection::open_with_flags(src, OpenFlags::SQLITE_OPEN_READ_ONLY)?; + let mut dst_conn = Connection::open(&dst)?; + + let bk = Backup::new(&src_conn, &mut dst_conn)?; + while let StepResult::More = bk.step(100)? {} + Ok(dst) +} + +pub fn restore>(backup_path: P, live_db_path: P) -> Result<()> { + fs::copy(&backup_path, &live_db_path)?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn migrations_apply_in_memory() { + // Opening an in-memory database should apply every migration without error. + let _conn = open(":memory:").expect("in-memory migrations should run cleanly"); + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs index 85c69bd..575f591 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -12,15 +12,20 @@ use rusqlite::{ OpenFlags, OptionalExtension, }; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; /// Embed every numbered migration file here. const MIGRATIONS: &[(&str, &str)] = &[ ("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")), ("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")), ("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")), + ("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")), ]; +/// Migrations that should *always* be re-run. +/// We no longer need to force any, so leave it empty. +const FORCE_APPLY_MIGRATIONS: &[i64] = &[]; // <- was &[4] + /* ─── connection bootstrap ──────────────────────────────────────────── */ pub fn open>(db_path: P) -> Result { @@ -51,6 +56,14 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> { // Legacy patch (ignore if exists) let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []); + // Force-remove migrations that should always be applied + for &version in FORCE_APPLY_MIGRATIONS { + let rows_affected = conn.execute("DELETE FROM schema_version WHERE version = ?1", [version])?; + if rows_affected > 0 { + info!("Forcing reapplication of migration {}", version); + } + } + let tx = conn.transaction()?; for (fname, sql) in MIGRATIONS { @@ -89,6 +102,37 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> { } tx.commit()?; + + // Verify that all migrations have been applied + let mut missing_migrations = Vec::new(); + for (fname, _) in MIGRATIONS { + let version: i64 = fname + .split('_') + .next() + .and_then(|s| s.parse().ok()) + .expect("migration filenames start with number"); + + let exists: bool = conn + .query_row( + "SELECT 1 FROM schema_version WHERE version = ?1", + [version], + |_| Ok(true), + ) + .optional()? + .unwrap_or(false); + + if !exists { + missing_migrations.push(version); + } + } + + if !missing_migrations.is_empty() { + warn!( + "The following migrations were not applied: {:?}. This may indicate a problem with the migration system.", + missing_migrations + ); + } + Ok(()) } diff --git a/src/main.rs b/src/main.rs index 897bc57..f9809bc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,36 +6,39 @@ mod logging; mod scan; use anyhow::{Context, Result}; -use clap::{Parser, Subcommand, CommandFactory}; -use clap_complete::{generate, Shell}; +use clap::{Parser, CommandFactory}; +use clap_complete::generate; use glob::Pattern; -use rusqlite::{params, OptionalExtension}; +use rusqlite::params; use shellexpand; use shlex; use std::{env, io, path::PathBuf, process::Command}; use tracing::{debug, error, info}; use walkdir::WalkDir; -use cli::{Cli, Commands, Format}; +use cli::{Cli, Commands}; fn main() -> Result<()> { - // Parse CLI and bootstrap logging - let mut args = Cli::parse(); + /* ── CLI parsing & logging ────────────────────────────────────── */ + + let args = Cli::parse(); if args.verbose { env::set_var("RUST_LOG", "debug"); } logging::init(); - // If the user asked for completions, generate and exit immediately. + /* ── shell-completion shortcut ───────────────────────────────── */ + if let Commands::Completions { shell } = &args.command { let mut cmd = Cli::command(); generate(*shell, &mut cmd, "marlin", &mut io::stdout()); return Ok(()); } - let cfg = config::Config::load()?; + /* ── config & automatic backup ───────────────────────────────── */ + + let cfg = config::Config::load()?; // DB path etc. - // Backup before any non-init, non-backup/restore command match &args.command { Commands::Init | Commands::Backup | Commands::Restore { .. } => {} _ => match db::backup(&cfg.db_path) { @@ -44,18 +47,29 @@ fn main() -> Result<()> { }, } - // Open (and migrate) the DB + /* ── open DB (runs migrations if needed) ─────────────────────── */ + let mut conn = db::open(&cfg.db_path)?; - // Dispatch all commands + /* ── command dispatch ────────────────────────────────────────── */ + match args.command { - Commands::Completions { .. } => {} + Commands::Completions { .. } => {} // already handled + Commands::Init => { info!("Database initialised at {}", cfg.db_path.display()); + + // Always (re-)scan the current directory so even an existing DB + // picks up newly created files in the working tree. + let cwd = env::current_dir().context("getting current directory")?; + let count = scan::scan_directory(&mut conn, &cwd) + .context("initial scan failed")?; + info!("Initial scan complete – indexed/updated {} files", count); } + Commands::Scan { paths } => { let scan_paths = if paths.is_empty() { - vec![std::env::current_dir()?] + vec![env::current_dir()?] } else { paths }; @@ -63,26 +77,21 @@ fn main() -> Result<()> { scan::scan_directory(&mut conn, &p)?; } } - Commands::Tag { pattern, tag_path } => { - apply_tag(&conn, &pattern, &tag_path)?; - } - Commands::Attr { action } => match action { + + Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?, + Commands::Attr { action } => match action { cli::AttrCmd::Set { pattern, key, value } => { - attr_set(&conn, &pattern, &key, &value)?; - } - cli::AttrCmd::Ls { path } => { - attr_ls(&conn, &path)?; + attr_set(&conn, &pattern, &key, &value)? } + cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?, }, - Commands::Search { query, exec } => { - run_search(&conn, &query, exec)?; - } - Commands::Backup => { + Commands::Search { query, exec } => run_search(&conn, &query, exec)?, + Commands::Backup => { let path = db::backup(&cfg.db_path)?; println!("Backup created: {}", path.display()); } - Commands::Restore { backup_path } => { - drop(conn); + Commands::Restore { backup_path } => { + drop(conn); // close handle before overwrite db::restore(&backup_path, &cfg.db_path) .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; println!("Restored DB from {}", backup_path.display()); @@ -90,20 +99,24 @@ fn main() -> Result<()> { .with_context(|| format!("Could not open restored DB at {}", cfg.db_path.display()))?; info!("Successfully opened restored database."); } - Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?, - Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?, - Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?, + + /* passthrough sub-modules that still stub out their logic */ + Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?, + Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?, + Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?, Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?, - Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?, - Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?, + Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?, + Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?, Commands::Annotate(an_cmd) => cli::annotate::run(&an_cmd, &mut conn, args.format)?, - Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?, - Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?, + Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?, + Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?, } Ok(()) } +/* ───────────────────────── helpers & sub-routines ────────────────── */ + /// Apply a hierarchical tag to all files matching the glob pattern. fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> { // ensure_tag_path returns the deepest-node ID @@ -114,13 +127,15 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu let mut current = Some(leaf_tag_id); while let Some(id) = current { tag_ids.push(id); - current = conn - .query_row( - "SELECT parent_id FROM tags WHERE id = ?1", - params![id], - |r| r.get::<_, Option>(0), - ) - .optional()?; + current = match conn.query_row( + "SELECT parent_id FROM tags WHERE id = ?1", + params![id], + |r| r.get::<_, Option>(0), + ) { + Ok(parent_id) => parent_id, + Err(rusqlite::Error::QueryReturnedNoRows) => None, + Err(e) => return Err(e.into()), + }; } let expanded = shellexpand::tilde(pattern).into_owned(); @@ -128,9 +143,10 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu .with_context(|| format!("Invalid glob pattern `{}`", expanded))?; let root = determine_scan_root(&expanded); - let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; - let mut stmt_insert = - conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?; + let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; + let mut stmt_insert = conn.prepare( + "INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)", + )?; let mut count = 0; for entry in WalkDir::new(&root) @@ -148,7 +164,6 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) { Ok(file_id) => { - // insert every segment tag let mut newly = false; for &tid in &tag_ids { if stmt_insert.execute(params![file_id, tid])? > 0 { @@ -236,7 +251,8 @@ fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> { let mut stmt = conn.prepare( "SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key", )?; - for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? { + for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? + { let (k, v) = row?; println!("{k} = {v}"); } @@ -244,8 +260,8 @@ fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> { } /// Build and run an FTS5 search query, with optional exec. -/// “tag:foo/bar” → tags_text:foo AND tags_text:bar -/// “attr:key=value” → attrs_text:key=value +/// “tag:foo/bar” → tags_text:foo AND tags_text:bar +/// “attr:k=v” → attrs_text:k AND attrs_text:v fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option) -> Result<()> { let mut fts_query_parts = Vec::new(); let parts = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]); @@ -261,8 +277,15 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option fts_query_parts.push(format!("tags_text:{}", escape_fts_query_term(seg))); } } else if let Some(attr) = part.strip_prefix("attr:") { - // keep whole key=value together - fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(attr))); + let mut kv = attr.splitn(2, '='); + let key = kv.next().unwrap(); + if let Some(value) = kv.next() { + fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(key))); + fts_query_parts.push("AND".into()); + fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(value))); + } else { + fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(key))); + } } else { fts_query_parts.push(escape_fts_query_term(&part)); } @@ -347,7 +370,11 @@ fn determine_scan_root(pattern: &str) -> PathBuf { let wildcard_pos = pattern.find(|c| c == '*' || c == '?' || c == '[').unwrap_or(pattern.len()); let prefix = &pattern[..wildcard_pos]; let mut root = PathBuf::from(prefix); - while root.as_os_str().to_string_lossy().contains(|c| ['*', '?', '['].contains(&c)) { + while root + .as_os_str() + .to_string_lossy() + .contains(|c| ['*', '?', '['].contains(&c)) + { if let Some(parent) = root.parent() { root = parent.to_path_buf(); } else { diff --git a/src/test_hierarchical_tags.rs b/src/test_hierarchical_tags.rs new file mode 100644 index 0000000..5c36911 --- /dev/null +++ b/src/test_hierarchical_tags.rs @@ -0,0 +1,240 @@ +// Test script to validate hierarchical tag FTS fix +// This script demonstrates how the fix works with a simple test case + +use rusqlite::{Connection, params}; +use std::path::Path; +use std::fs; +use anyhow::Result; + +fn main() -> Result<()> { + // Create a test database in a temporary location + let db_path = Path::new("/tmp/marlin_test.db"); + if db_path.exists() { + fs::remove_file(db_path)?; + } + + println!("Creating test database at {:?}", db_path); + + // Initialize database with our schema and migrations + let conn = Connection::open(db_path)?; + + // Apply schema (simplified version of what's in the migrations) + println!("Applying schema..."); + conn.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + + CREATE TABLE files ( + id INTEGER PRIMARY KEY, + path TEXT NOT NULL UNIQUE, + size INTEGER, + mtime INTEGER, + hash TEXT + ); + + CREATE TABLE tags ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, + canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL, + UNIQUE(name, parent_id) + ); + + CREATE TABLE file_tags ( + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY(file_id, tag_id) + ); + + CREATE TABLE attributes ( + id INTEGER PRIMARY KEY, + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + key TEXT NOT NULL, + value TEXT, + UNIQUE(file_id, key) + ); + + CREATE VIRTUAL TABLE files_fts + USING fts5( + path, + tags_text, + attrs_text, + content='', + tokenize=\"unicode61 remove_diacritics 2\" + );" + )?; + + // Apply our fixed triggers + println!("Applying fixed FTS triggers..."); + conn.execute_batch( + "CREATE TRIGGER files_fts_ai_file + AFTER INSERT ON files + BEGIN + INSERT INTO files_fts(rowid, path, tags_text, attrs_text) + VALUES ( + NEW.id, + NEW.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path as tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = NEW.id + + UNION + + SELECT t.name as tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = NEW.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = NEW.id) + ); + END; + + CREATE TRIGGER file_tags_fts_ai + AFTER INSERT ON file_tags + BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path as tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name as tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; + END;" + )?; + + // Insert test data + println!("Inserting test data..."); + + // Insert a test file + conn.execute( + "INSERT INTO files (id, path) VALUES (1, '/test/document.md')", + [], + )?; + + // Create hierarchical tags: project/md + println!("Creating hierarchical tags: project/md"); + + // Insert parent tag 'project' + conn.execute( + "INSERT INTO tags (id, name, parent_id) VALUES (1, 'project', NULL)", + [], + )?; + + // Insert child tag 'md' under 'project' + conn.execute( + "INSERT INTO tags (id, name, parent_id) VALUES (2, 'md', 1)", + [], + )?; + + // Tag the file with the 'md' tag (which is under 'project') + conn.execute( + "INSERT INTO file_tags (file_id, tag_id) VALUES (1, 2)", + [], + )?; + + // Check what's in the FTS index + println!("\nChecking FTS index content:"); + let mut stmt = conn.prepare("SELECT rowid, path, tags_text, attrs_text FROM files_fts")?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + )) + })?; + + for row in rows { + let (id, path, tags, attrs) = row?; + println!("ID: {}, Path: {}, Tags: '{}', Attrs: '{}'", id, path, tags, attrs); + } + + // Test searching for the full hierarchical tag path + println!("\nTesting search for 'project/md':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project/md'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'project/md'"); + } + + // Test searching for just the parent tag + println!("\nTesting search for just 'project':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'project'"); + } + + // Test searching for just the child tag + println!("\nTesting search for just 'md':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'md'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'md'"); + } + + println!("\nTest completed successfully!"); + Ok(()) +} diff --git a/target/release/marlin b/target/release/marlin index e7ddccb..5694a65 100755 Binary files a/target/release/marlin and b/target/release/marlin differ diff --git a/target/release/marlin.d b/target/release/marlin.d index e155977..391de91 100644 --- a/target/release/marlin.d +++ b/target/release/marlin.d @@ -1 +1 @@ -/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs +/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs diff --git a/tests/e2e.rs b/tests/e2e.rs new file mode 100644 index 0000000..a97cb99 --- /dev/null +++ b/tests/e2e.rs @@ -0,0 +1,103 @@ +//! End-to-end smoke-tests for the marlin binary. +//! +//! Run with `cargo test --test e2e` or let CI invoke `cargo test`. + +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::{fs, path::PathBuf, process::Command}; +use tempfile::tempdir; + +/// Absolute path to the `marlin` binary Cargo just built for this test run. +fn marlin_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_marlin")) +} + +fn spawn_demo_tree(root: &PathBuf) { + fs::create_dir_all(root.join("Projects/Alpha")).unwrap(); + fs::create_dir_all(root.join("Projects/Beta")).unwrap(); + fs::create_dir_all(root.join("Projects/Gamma")).unwrap(); + fs::create_dir_all(root.join("Logs")).unwrap(); + fs::create_dir_all(root.join("Reports")).unwrap(); + + fs::write(root.join("Projects/Alpha/draft1.md"), "- [ ] TODO foo\n").unwrap(); + fs::write(root.join("Projects/Alpha/draft2.md"), "- [x] TODO foo\n").unwrap(); + fs::write(root.join("Projects/Beta/final.md"), "done\n").unwrap(); + fs::write(root.join("Projects/Gamma/TODO.txt"), "TODO bar\n").unwrap(); + fs::write(root.join("Logs/app.log"), "ERROR omg\n").unwrap(); + fs::write(root.join("Reports/Q1.pdf"), "PDF\n").unwrap(); +} + +fn run(cmd: &mut Command) -> assert_cmd::assert::Assert { + cmd.assert().success() +} + +#[test] +fn full_cli_flow() -> Result<(), Box> { + // 1. sandbox + let tmp = tempdir()?; + let demo_dir = tmp.path().join("marlin_demo"); + spawn_demo_tree(&demo_dir); + + // 2. init (auto-scan cwd) + run(Command::new(marlin_bin()) + .current_dir(&demo_dir) + .arg("init")); + + // 3. tag & attr + run(Command::new(marlin_bin()) + .arg("tag") + .arg(format!("{}/Projects/**/*.md", demo_dir.display())) + .arg("project/md")); + + run(Command::new(marlin_bin()) + .arg("attr") + .arg("set") + .arg(format!("{}/Reports/*.pdf", demo_dir.display())) + .arg("reviewed") + .arg("yes")); + + // 4. search expectations + Command::new(marlin_bin()) + .arg("search") + .arg("TODO") + .assert() + .stdout(predicate::str::contains("TODO.txt")); + + Command::new(marlin_bin()) + .arg("search") + .arg("attr:reviewed=yes") + .assert() + .stdout(predicate::str::contains("Q1.pdf")); + + // 5. link & backlinks + let foo = demo_dir.join("foo.txt"); + let bar = demo_dir.join("bar.txt"); + fs::write(&foo, "")?; + fs::write(&bar, "")?; + run(Command::new(marlin_bin()).arg("scan").arg(&demo_dir)); + run(Command::new(marlin_bin()) + .arg("link").arg("add") + .arg(&foo).arg(&bar)); + Command::new(marlin_bin()) + .arg("link").arg("backlinks").arg(&bar) + .assert() + .stdout(predicate::str::contains("foo.txt")); + + // 6. backup / restore round-trip + let backup_path = String::from_utf8( + Command::new(marlin_bin()).arg("backup").output()?.stdout + )?; + let backup_file = backup_path.split_whitespace().last().unwrap(); + + // wipe DB file + std::fs::remove_file(dirs::data_dir().unwrap().join("marlin/index.db"))?; + run(Command::new(marlin_bin()).arg("restore").arg(backup_file)); + + // sanity: search still works + Command::new(marlin_bin()) + .arg("search").arg("TODO") + .assert() + .stdout(predicate::str::contains("TODO.txt")); + + Ok(()) +} diff --git a/tests/test.md b/tests/test.md new file mode 100644 index 0000000..3b211d1 --- /dev/null +++ b/tests/test.md @@ -0,0 +1,68 @@ +# Testing + +Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**. + +--- + +## 0 Prepare once + +```bash +# Run once (or add to ~/.bashrc) so debug + release artefacts land +# in the same predictable place. Speeds-up future builds. +export CARGO_TARGET_DIR=target +``` + +--- + +## 1 Build the new binary + +```bash +git pull # grab the latest commit +cargo build --release +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin +``` + +* `cargo build --release` – builds the optimised binary. +* `install …` – copies it into your `$PATH` so `marlin` on the CLI is the fresh one. + +--- + +## 2 Run the smoke-test suite + +```bash +# Runs the end-to-end test we added in tests/e2e.rs +cargo test --test e2e -- --nocapture +``` + +* `--test e2e` – compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like). +* `--nocapture` – streams stdout/stderr so you can watch each CLI step in real time. +* Exit-code **0** ➜ everything passed. + Any non-zero exit or a red ✗ line means a step failed; the assert’s diff will show the command and its output. + +--- + +## 3 (Optionally) run all tests + +```bash +cargo test --all -- --nocapture +``` + +This will execute: + +* unit tests in `src/**` +* every file in `tests/` +* doc-tests + +If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically. + +--- + +### One-liner helper (copy/paste) + +```bash +git pull && cargo build --release && +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin && +cargo test --test e2e -- --nocapture +``` + +Stick that in a shell alias (`alias marlin-ci='…'`) and you’ve got a 5-second upgrade-and-verify loop.