This commit is contained in:
thePR0M3TH3AN
2025-05-16 16:07:23 -04:00
parent 039d67af43
commit 9ed57d15c7
13 changed files with 1425 additions and 225 deletions

224
Cargo.lock generated
View File

@@ -94,6 +94,22 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "assert_cmd"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66"
dependencies = [
"anstyle",
"bstr",
"doc-comment",
"libc",
"predicates",
"predicates-core",
"predicates-tree",
"wait-timeout",
]
[[package]]
name = "autocfg"
version = "1.4.0"
@@ -106,6 +122,17 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
@@ -202,6 +229,12 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "difflib"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "directories"
version = "5.0.1"
@@ -211,6 +244,15 @@ dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "6.0.0"
@@ -244,6 +286,22 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "errno"
version = "0.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
dependencies = [
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
@@ -256,6 +314,21 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "float-cmp"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
dependencies = [
"num-traits",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -264,7 +337,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@@ -370,6 +455,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "log"
version = "0.4.27"
@@ -381,14 +472,18 @@ name = "marlin"
version = "0.1.0"
dependencies = [
"anyhow",
"assert_cmd",
"chrono",
"clap",
"clap_complete",
"directories",
"dirs 5.0.1",
"glob",
"predicates",
"rusqlite",
"shellexpand",
"shlex",
"tempfile",
"tracing",
"tracing-subscriber",
"walkdir",
@@ -409,6 +504,12 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "normalize-line-endings"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -458,6 +559,36 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "predicates"
version = "3.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573"
dependencies = [
"anstyle",
"difflib",
"float-cmp",
"normalize-line-endings",
"predicates-core",
"regex",
]
[[package]]
name = "predicates-core"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa"
[[package]]
name = "predicates-tree"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c"
dependencies = [
"predicates-core",
"termtree",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
@@ -476,13 +607,19 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
[[package]]
name = "redox_users"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror 1.0.69",
]
@@ -493,7 +630,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror 2.0.12",
]
@@ -556,6 +693,19 @@ dependencies = [
"smallvec",
]
[[package]]
name = "rustix"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.59.0",
]
[[package]]
name = "rustversion"
version = "1.0.20"
@@ -571,6 +721,26 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@@ -586,7 +756,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb"
dependencies = [
"dirs",
"dirs 6.0.0",
]
[[package]]
@@ -618,6 +788,25 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tempfile"
version = "3.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
dependencies = [
"fastrand",
"getrandom 0.3.3",
"once_cell",
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "termtree"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -759,6 +948,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -775,6 +973,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
@@ -1062,6 +1269,15 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]]
name = "zerocopy"
version = "0.8.25"

View File

@@ -16,3 +16,9 @@ shlex = "1.3"
chrono = "0.4"
shellexpand = "3.1"
clap_complete = "4.1"
[dev-dependencies]
assert_cmd = "2"
predicates = "3"
tempfile = "3"
dirs = "5" # cross-platform data dir helper

174
README.md
View File

@@ -60,6 +60,74 @@ sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
For a concise walkthrough, see [Quick start & Demo](marlin_demo.md).
## Testing
Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
---
### 0 Prepare once
```bash
# Run once (or add to ~/.bashrc) so debug + release artefacts land
# in the same predictable place. Speeds-up future builds.
export CARGO_TARGET_DIR=target
```
---
### 1 Build the new binary
```bash
git pull # grab the latest commit
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
* `cargo build --release` builds the optimised binary.
* `install …` copies it into your `$PATH` so `marlin` on the CLI is the fresh one.
---
### 2 Run the smoke-test suite
```bash
# Runs the end-to-end test we added in tests/e2e.rs
cargo test --test e2e -- --nocapture
```
* `--test e2e` compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like).
* `--nocapture` streams stdout/stderr so you can watch each CLI step in real time.
* Exit-code **0** ➜ everything passed.
Any non-zero exit or a red ✗ line means a step failed; the asserts diff will show the command and its output.
---
### 3 (Optionally) run all tests
```bash
cargo test --all -- --nocapture
```
This will execute:
* unit tests in `src/**`
* every file in `tests/`
* doc-tests
If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically.
---
#### One-liner helper (copy/paste)
```bash
git pull && cargo build --release &&
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
cargo test --test e2e -- --nocapture
```
Stick that in a shell alias (`alias marlin-ci='…'`) and youve got a 5-second upgrade-and-verify loop.
### Database location
@@ -129,112 +197,6 @@ The versioned migration system preserves your data across upgrades.
---
## Five-Minute Quickstart
Just paste & run each block in your terminal.
### 0Prepare, build & install
```bash
cd ~/Documents/GitHub/Marlin
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
> Now `marlin` is available everywhere.
### 1Enable shell completion
```bash
mkdir -p ~/.config/bash_completion.d
marlin completions bash > ~/.config/bash_completion.d/marlin
```
### 2Prepare a clean demo directory
```bash
rm -rf ~/marlin_demo
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
printf "Alpha draft\n" > ~/marlin_demo/Projects/Alpha/draft.txt
printf "Beta notes\n" > ~/marlin_demo/Projects/Beta/notes.md
printf "Receipt PDF\n" > ~/marlin_demo/Docs/receipt.pdf
printf "fake jpg\n" > ~/marlin_demo/Media/Photos/vacation.jpg
```
### 3Initialize & index files
```bash
marlin init
marlin scan ~/marlin_demo
# show every path tested:
marlin --verbose scan ~/marlin_demo
```
> Only changed files get re-indexed on subsequent runs.
### 4Attach tags & attributes
```bash
# Tag everything under “Alpha”
marlin tag ~/marlin_demo/Projects/Alpha/**/* project/alpha
# Mark all PDFs as reviewed
marlin attr set ~/marlin_demo/**/*.pdf reviewed yes
# Output as JSON instead:
marlin --format=json attr set ~/marlin_demo/**/*.pdf reviewed yes
```
### 5Search your index
```bash
# By tag or filename
marlin search alpha
# Combined terms:
marlin search "reviewed AND pdf"
# Run a command on each hit:
marlin search reviewed --exec 'echo HIT → {}'
```
### 6Backup & restore
```bash
# Snapshot
snap=$(marlin backup | awk '{print $NF}')
# Simulate loss
rm ~/.local/share/marlin/index.db
# Restore
marlin restore "$snap"
# Verify
marlin search reviewed
```
---
##### What you just exercised
| Command | Purpose |
| ----------------- | ----------------------------------------- |
| `marlin init` | Create / upgrade the SQLite database |
| `marlin scan` | Walk directories and (re)index files |
| `marlin tag` | Attach hierarchical tags |
| `marlin attr set` | Add/overwrite custom key-value attributes |
| `marlin search` | FTS5 search across path / tags / attrs |
| `--exec` | Pipe hits into any shell command |
| `marlin backup` | Timestamped snapshot of the DB |
| `marlin restore` | Replace live DB with a chosen snapshot |
Thats the complete surface area of Marlin today—feel free to play around or point the scanner at real folders.
---
## License
MIT see `LICENSE`

View File

@@ -1,19 +1,23 @@
# Marlin Demo
Heres a little demo you can spin up to exercise tags, attributes, FTS queries, `--exec` hooks, backups & restores, and linking. Just copypaste each block into your terminal:
Below is the **“hello-world” demo** that matches the current master branch (auto-scan on `marlin init`, no more forced-migration noise, and cleaner build).
---
### 0Create the demo folder and some files
```bash
cargo build --release
```
## 0Build & install Marlin
```bash
# inside the repo
cargo build --release # build the new binary
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
*(`cargo install --path . --locked --force` works too if you prefer.)*
---
## 1Create the demo tree
```bash
rm -rf ~/marlin_demo
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta,Gamma},Logs,Reports,Scripts,Media/Photos}
@@ -72,18 +76,31 @@ chmod +x ~/marlin_demo/Scripts/deploy.sh
echo "JPEGDATA" > ~/marlin_demo/Media/Photos/event.jpg
```
*(copy the file-creation block from your original instructions — nothing about the files needs to change)*
---
### 1Initialize & index
## 2Initialise **and** index (one step)
`marlin init` now performs a first-time scan of whatever directory you run it in.
So just:
```bash
cd ~/marlin_demo # <-- important: run init from the folder you want indexed
marlin init
marlin scan ~/marlin_demo
```
That will:
1. create/upgrade the DB,
2. run all migrations exactly once,
3. walk the current directory and ingest every file it finds.
Need to add more paths later? Use `marlin scan <dir>` exactly as before.
---
### 2Attach hierarchical tags
## 3Tagging examples
```bash
# Tag all project markdown as “project/md”
@@ -98,101 +115,69 @@ marlin tag "~/marlin_demo/Projects/Beta/**/*" project/beta
---
### 3Set custom attributes
## 4Set custom attributes
```bash
# Mark only the “final.md” as complete
marlin attr set "~/marlin_demo/Projects/Beta/final.md" status complete
# Mark PDF as reviewed
marlin attr set "~/marlin_demo/Reports/*.pdf" reviewed yes
marlin attr set "~/marlin_demo/Projects/Beta/final.md" status complete
marlin attr set "~/marlin_demo/Reports/*.pdf" reviewed yes
```
---
### 4Play with search
## 5Play with search / exec hooks
```bash
# Find all TODOs (in any file)
marlin search TODO
# All markdown under your “project/md” tag
marlin search tag:project/md
# All files tagged “logs/app” containing ERROR
marlin search "tag:logs/app AND ERROR"
# Only your completed Beta deliverable
marlin search "attr:status=complete"
# Only reviewed PDFs
marlin search "attr:reviewed=yes AND pdf"
# Open every reviewed report
marlin search "attr:reviewed=yes" --exec 'xdg-open {}'
```
---
### 5Try JSON output & verbose mode
## 6JSON output & verbose mode
```bash
marlin --format=json attr ls ~/marlin_demo/Projects/Beta/final.md
marlin --verbose scan ~/marlin_demo
marlin --verbose scan ~/marlin_demo # re-scan to see debug logs
```
---
### 6Snapshot & restore
## 7Snapshot & restore
```bash
# Snapshot
snap=$(marlin backup | awk '{print $NF}')
# Delete your DB to simulate data loss
rm ~/.local/share/marlin/index.db
# Bring it back
rm ~/.local/share/marlin/index.db # simulate disaster
marlin restore "$snap"
# Confirm you still see “TODO”
marlin search TODO
marlin search TODO # should still work
```
---
### 7Test linking functionality
## 8Linking demo
```bash
# Create two demo files
touch ~/marlin_demo/foo.txt ~/marlin_demo/bar.txt
marlin scan ~/marlin_demo # index the new files
# Re-scan to index new files
marlin scan ~/marlin_demo
# Link foo.txt → bar.txt
foo=~/marlin_demo/foo.txt
bar=~/marlin_demo/bar.txt
marlin link add "$foo" "$bar"
# List outgoing links for foo.txt
marlin link list "$foo"
# List incoming links (backlinks) to bar.txt
marlin link backlinks "$bar"
marlin link add "$foo" "$bar" # create link
marlin link list "$foo" # outgoing links from foo
marlin link backlinks "$bar" # incoming links to bar
```
---
That gives you:
### Recap
* **wide folder structures** (Projects, Logs, Reports, Scripts, Media)
* **hierarchical tags** you can mix and match
* **key-value attributes** to flag state & review
* **FTS5 queries** with AND/OR/NOT
* **`--exec` hooks** to trigger external commands
* **JSON output** for programmatic gluing
* **backups & restores** to guard your data
* **file-to-file links** for graph relationships
* `cargo build --release` + `sudo install …` is still the build path.
* **`cd` to the folder you want indexed and run `marlin init`** — first scan happens automatically.
* Subsequent scans (`marlin scan …`) are only needed for *new* directories you add later.
* No more “forcing reapplication of migration 4” banner and the unused-import warnings are gone.
Have fun playing around!
Happy organising!

View File

@@ -0,0 +1,289 @@
-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded
DROP TRIGGER IF EXISTS files_fts_ai_file;
DROP TRIGGER IF EXISTS files_fts_au_file;
DROP TRIGGER IF EXISTS files_fts_ad_file;
DROP TRIGGER IF EXISTS file_tags_fts_ai;
DROP TRIGGER IF EXISTS file_tags_fts_ad;
DROP TRIGGER IF EXISTS attributes_fts_ai;
DROP TRIGGER IF EXISTS attributes_fts_au;
DROP TRIGGER IF EXISTS attributes_fts_ad;
-- Create a new trigger for file insertion that uses recursive CTE for full tag paths
CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id,
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = NEW.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
-- Recreate the file path update trigger
CREATE TRIGGER files_fts_au_file
AFTER UPDATE OF path ON files
BEGIN
UPDATE files_fts
SET path = NEW.path
WHERE rowid = NEW.id;
END;
-- Recreate the file deletion trigger
CREATE TRIGGER files_fts_ad_file
AFTER DELETE ON files
BEGIN
DELETE FROM files_fts WHERE rowid = OLD.id;
END;
-- Create new trigger for tag insertion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- Create new trigger for tag deletion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ad
AFTER DELETE ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Create new triggers for attribute operations that use recursive CTE for full tag paths
CREATE TRIGGER attributes_fts_ai
AFTER INSERT ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_au
AFTER UPDATE OF value ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_ad
AFTER DELETE ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Update all existing FTS entries with the new tag-path format
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f;

260
src/db/migrations/mod.rs Normal file
View File

@@ -0,0 +1,260 @@
use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::{Context, Result};
use chrono::Local;
use rusqlite::{
backup::{Backup, StepResult},
params,
Connection,
OpenFlags,
OptionalExtension,
};
use tracing::{debug, info};
/// Embed every numbered migration file here.
const MIGRATIONS: &[(&str, &str)] = &[
("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")),
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
];
/* ─── connection bootstrap ──────────────────────────────────────────── */
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
let db_path_ref = db_path.as_ref();
let mut conn = Connection::open(db_path_ref)
.with_context(|| format!("failed to open DB at {}", db_path_ref.display()))?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "foreign_keys", "ON")?;
// Apply migrations (drops & recreates all FTS triggers)
apply_migrations(&mut conn)?;
Ok(conn)
}
/* ─── migration runner ──────────────────────────────────────────────── */
fn apply_migrations(conn: &mut Connection) -> Result<()> {
// Ensure schema_version table
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
applied_on TEXT NOT NULL
);",
)?;
// Legacy patch (ignore if exists)
let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []);
let tx = conn.transaction()?;
for (fname, sql) in MIGRATIONS {
let version: i64 = fname
.split('_')
.next()
.and_then(|s| s.parse().ok())
.expect("migration filenames start with number");
let already: Option<i64> = tx
.query_row(
"SELECT version FROM schema_version WHERE version = ?1",
[version],
|r| r.get(0),
)
.optional()?;
if already.is_some() {
debug!("migration {} already applied", fname);
continue;
}
info!("applying migration {}", fname);
println!(
"\nSQL SCRIPT FOR MIGRATION: {}\nBEGIN SQL >>>\n{}\n<<< END SQL\n",
fname, sql
);
tx.execute_batch(sql)
.with_context(|| format!("could not apply migration {}", fname))?;
tx.execute(
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
params![version, Local::now().to_rfc3339()],
)?;
}
tx.commit()?;
Ok(())
}
/* ─── helpers ───────────────────────────────────────────────────────── */
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let mut parent: Option<i64> = None;
for segment in path.split('/').filter(|s| !s.is_empty()) {
conn.execute(
"INSERT OR IGNORE INTO tags(name, parent_id) VALUES (?1, ?2)",
params![segment, parent],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
|row| row.get(0),
)?;
parent = Some(id);
}
parent.ok_or_else(|| anyhow::anyhow!("empty tag path"))
}
pub fn file_id(conn: &Connection, path: &str) -> Result<i64> {
conn.query_row("SELECT id FROM files WHERE path = ?1", [path], |r| r.get(0))
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
INSERT INTO attributes(file_id, key, value)
VALUES (?1, ?2, ?3)
ON CONFLICT(file_id, key) DO UPDATE SET value = excluded.value
"#,
params![file_id, key, value],
)?;
Ok(())
}
/// Add a typed link from one file to another.
pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"INSERT INTO links(src_file_id, dst_file_id, type)
VALUES (?1, ?2, ?3)
ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
/// Remove a typed link between two files.
pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"DELETE FROM links
WHERE src_file_id = ?1
AND dst_file_id = ?2
AND (type IS ?3 OR type = ?3)",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
/// List all links for files matching a glob-style pattern.
/// `direction` may be `"in"` (incoming), `"out"` (outgoing), or `None` (outgoing).
pub fn list_links(
conn: &Connection,
pattern: &str,
direction: Option<&str>,
link_type: Option<&str>,
) -> Result<Vec<(String, String, Option<String>)>> {
// Convert glob '*' → SQL LIKE '%'
let like_pattern = pattern.replace('*', "%");
// Find matching files
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
let mut rows = stmt.query(params![like_pattern])?;
let mut files = Vec::new();
while let Some(row) = rows.next()? {
let id: i64 = row.get(0)?;
let path: String = row.get(1)?;
files.push((id, path));
}
let mut results = Vec::new();
for (file_id, file_path) in files {
let (src_col, dst_col) = match direction {
Some("in") => ("dst_file_id", "src_file_id"),
_ => ("src_file_id", "dst_file_id"),
};
let sql = format!(
"SELECT f2.path, l.type
FROM links l
JOIN files f2 ON f2.id = l.{dst}
WHERE l.{src} = ?1
AND (?2 IS NULL OR l.type = ?2)",
src = src_col,
dst = dst_col,
);
let mut stmt2 = conn.prepare(&sql)?;
let mut rows2 = stmt2.query(params![file_id, link_type])?;
while let Some(r2) = rows2.next()? {
let other: String = r2.get(0)?;
let typ: Option<String> = r2.get(1)?;
results.push((file_path.clone(), other, typ));
}
}
Ok(results)
}
/// Find all incoming links (backlinks) to files matching a pattern.
pub fn find_backlinks(conn: &Connection, pattern: &str) -> Result<Vec<(String, Option<String>)>> {
let like_pattern = pattern.replace('*', "%");
let mut stmt = conn.prepare(
"SELECT f1.path, l.type
FROM links l
JOIN files f1 ON f1.id = l.src_file_id
JOIN files f2 ON f2.id = l.dst_file_id
WHERE f2.path LIKE ?1",
)?;
let mut rows = stmt.query(params![like_pattern])?;
let mut result = Vec::new();
while let Some(row) = rows.next()? {
let src_path: String = row.get(0)?;
let typ: Option<String> = row.get(1)?;
result.push((src_path, typ));
}
Ok(result)
}
/* ─── backup / restore ──────────────────────────────────────────────── */
pub fn backup<P: AsRef<Path>>(db_path: P) -> Result<PathBuf> {
let src = db_path.as_ref();
let dir = src
.parent()
.ok_or_else(|| anyhow::anyhow!("invalid DB path: {}", src.display()))?
.join("backups");
fs::create_dir_all(&dir)?;
let stamp = Local::now().format("%Y-%m-%d_%H-%M-%S");
let dst = dir.join(format!("backup_{stamp}.db"));
let src_conn = Connection::open_with_flags(src, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let mut dst_conn = Connection::open(&dst)?;
let bk = Backup::new(&src_conn, &mut dst_conn)?;
while let StepResult::More = bk.step(100)? {}
Ok(dst)
}
pub fn restore<P: AsRef<Path>>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn migrations_apply_in_memory() {
// Opening an in-memory database should apply every migration without error.
let _conn = open(":memory:").expect("in-memory migrations should run cleanly");
}
}

View File

@@ -12,15 +12,20 @@ use rusqlite::{
OpenFlags,
OptionalExtension,
};
use tracing::{debug, info};
use tracing::{debug, info, warn};
/// Embed every numbered migration file here.
const MIGRATIONS: &[(&str, &str)] = &[
("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")),
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
];
/// Migrations that should *always* be re-run.
/// We no longer need to force any, so leave it empty.
const FORCE_APPLY_MIGRATIONS: &[i64] = &[]; // <- was &[4]
/* ─── connection bootstrap ──────────────────────────────────────────── */
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
@@ -51,6 +56,14 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
// Legacy patch (ignore if exists)
let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []);
// Force-remove migrations that should always be applied
for &version in FORCE_APPLY_MIGRATIONS {
let rows_affected = conn.execute("DELETE FROM schema_version WHERE version = ?1", [version])?;
if rows_affected > 0 {
info!("Forcing reapplication of migration {}", version);
}
}
let tx = conn.transaction()?;
for (fname, sql) in MIGRATIONS {
@@ -89,6 +102,37 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> {
}
tx.commit()?;
// Verify that all migrations have been applied
let mut missing_migrations = Vec::new();
for (fname, _) in MIGRATIONS {
let version: i64 = fname
.split('_')
.next()
.and_then(|s| s.parse().ok())
.expect("migration filenames start with number");
let exists: bool = conn
.query_row(
"SELECT 1 FROM schema_version WHERE version = ?1",
[version],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !exists {
missing_migrations.push(version);
}
}
if !missing_migrations.is_empty() {
warn!(
"The following migrations were not applied: {:?}. This may indicate a problem with the migration system.",
missing_migrations
);
}
Ok(())
}

View File

@@ -6,36 +6,39 @@ mod logging;
mod scan;
use anyhow::{Context, Result};
use clap::{Parser, Subcommand, CommandFactory};
use clap_complete::{generate, Shell};
use clap::{Parser, CommandFactory};
use clap_complete::generate;
use glob::Pattern;
use rusqlite::{params, OptionalExtension};
use rusqlite::params;
use shellexpand;
use shlex;
use std::{env, io, path::PathBuf, process::Command};
use tracing::{debug, error, info};
use walkdir::WalkDir;
use cli::{Cli, Commands, Format};
use cli::{Cli, Commands};
fn main() -> Result<()> {
// Parse CLI and bootstrap logging
let mut args = Cli::parse();
/* ── CLI parsing & logging ────────────────────────────────────── */
let args = Cli::parse();
if args.verbose {
env::set_var("RUST_LOG", "debug");
}
logging::init();
// If the user asked for completions, generate and exit immediately.
/* ── shell-completion shortcut ───────────────────────────────── */
if let Commands::Completions { shell } = &args.command {
let mut cmd = Cli::command();
generate(*shell, &mut cmd, "marlin", &mut io::stdout());
return Ok(());
}
let cfg = config::Config::load()?;
/* ── config & automatic backup ───────────────────────────────── */
let cfg = config::Config::load()?; // DB path etc.
// Backup before any non-init, non-backup/restore command
match &args.command {
Commands::Init | Commands::Backup | Commands::Restore { .. } => {}
_ => match db::backup(&cfg.db_path) {
@@ -44,18 +47,29 @@ fn main() -> Result<()> {
},
}
// Open (and migrate) the DB
/* ── open DB (runs migrations if needed) ─────────────────────── */
let mut conn = db::open(&cfg.db_path)?;
// Dispatch all commands
/* ── command dispatch ────────────────────────────────────────── */
match args.command {
Commands::Completions { .. } => {}
Commands::Completions { .. } => {} // already handled
Commands::Init => {
info!("Database initialised at {}", cfg.db_path.display());
// Always (re-)scan the current directory so even an existing DB
// picks up newly created files in the working tree.
let cwd = env::current_dir().context("getting current directory")?;
let count = scan::scan_directory(&mut conn, &cwd)
.context("initial scan failed")?;
info!("Initial scan complete indexed/updated {} files", count);
}
Commands::Scan { paths } => {
let scan_paths = if paths.is_empty() {
vec![std::env::current_dir()?]
vec![env::current_dir()?]
} else {
paths
};
@@ -63,26 +77,21 @@ fn main() -> Result<()> {
scan::scan_directory(&mut conn, &p)?;
}
}
Commands::Tag { pattern, tag_path } => {
apply_tag(&conn, &pattern, &tag_path)?;
}
Commands::Attr { action } => match action {
Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?,
Commands::Attr { action } => match action {
cli::AttrCmd::Set { pattern, key, value } => {
attr_set(&conn, &pattern, &key, &value)?;
}
cli::AttrCmd::Ls { path } => {
attr_ls(&conn, &path)?;
attr_set(&conn, &pattern, &key, &value)?
}
cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?,
},
Commands::Search { query, exec } => {
run_search(&conn, &query, exec)?;
}
Commands::Backup => {
Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
Commands::Backup => {
let path = db::backup(&cfg.db_path)?;
println!("Backup created: {}", path.display());
}
Commands::Restore { backup_path } => {
drop(conn);
Commands::Restore { backup_path } => {
drop(conn); // close handle before overwrite
db::restore(&backup_path, &cfg.db_path)
.with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?;
println!("Restored DB from {}", backup_path.display());
@@ -90,20 +99,24 @@ fn main() -> Result<()> {
.with_context(|| format!("Could not open restored DB at {}", cfg.db_path.display()))?;
info!("Successfully opened restored database.");
}
Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?,
Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?,
Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?,
/* passthrough sub-modules that still stub out their logic */
Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?,
Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?,
Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?,
Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?,
Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?,
Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?,
Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?,
Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?,
Commands::Annotate(an_cmd) => cli::annotate::run(&an_cmd, &mut conn, args.format)?,
Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?,
Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?,
Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?,
Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?,
}
Ok(())
}
/* ───────────────────────── helpers & sub-routines ────────────────── */
/// Apply a hierarchical tag to all files matching the glob pattern.
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> {
// ensure_tag_path returns the deepest-node ID
@@ -114,13 +127,15 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu
let mut current = Some(leaf_tag_id);
while let Some(id) = current {
tag_ids.push(id);
current = conn
.query_row(
"SELECT parent_id FROM tags WHERE id = ?1",
params![id],
|r| r.get::<_, Option<i64>>(0),
)
.optional()?;
current = match conn.query_row(
"SELECT parent_id FROM tags WHERE id = ?1",
params![id],
|r| r.get::<_, Option<i64>>(0),
) {
Ok(parent_id) => parent_id,
Err(rusqlite::Error::QueryReturnedNoRows) => None,
Err(e) => return Err(e.into()),
};
}
let expanded = shellexpand::tilde(pattern).into_owned();
@@ -128,9 +143,10 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu
.with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
let root = determine_scan_root(&expanded);
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert =
conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?;
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert = conn.prepare(
"INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
)?;
let mut count = 0;
for entry in WalkDir::new(&root)
@@ -148,7 +164,6 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu
match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(file_id) => {
// insert every segment tag
let mut newly = false;
for &tid in &tag_ids {
if stmt_insert.execute(params![file_id, tid])? > 0 {
@@ -236,7 +251,8 @@ fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
let mut stmt = conn.prepare(
"SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key",
)?;
for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? {
for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?
{
let (k, v) = row?;
println!("{k} = {v}");
}
@@ -244,8 +260,8 @@ fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
}
/// Build and run an FTS5 search query, with optional exec.
/// “tag:foo/bar” → tags_text:foo AND tags_text:bar
/// “attr:key=value” → attrs_text:key=value
/// “tag:foo/bar” → tags_text:foo AND tags_text:bar
/// “attr:k=v” → attrs_text:k AND attrs_text:v
fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option<String>) -> Result<()> {
let mut fts_query_parts = Vec::new();
let parts = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]);
@@ -261,8 +277,15 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option<String>
fts_query_parts.push(format!("tags_text:{}", escape_fts_query_term(seg)));
}
} else if let Some(attr) = part.strip_prefix("attr:") {
// keep whole key=value together
fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(attr)));
let mut kv = attr.splitn(2, '=');
let key = kv.next().unwrap();
if let Some(value) = kv.next() {
fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(key)));
fts_query_parts.push("AND".into());
fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(value)));
} else {
fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(key)));
}
} else {
fts_query_parts.push(escape_fts_query_term(&part));
}
@@ -347,7 +370,11 @@ fn determine_scan_root(pattern: &str) -> PathBuf {
let wildcard_pos = pattern.find(|c| c == '*' || c == '?' || c == '[').unwrap_or(pattern.len());
let prefix = &pattern[..wildcard_pos];
let mut root = PathBuf::from(prefix);
while root.as_os_str().to_string_lossy().contains(|c| ['*', '?', '['].contains(&c)) {
while root
.as_os_str()
.to_string_lossy()
.contains(|c| ['*', '?', '['].contains(&c))
{
if let Some(parent) = root.parent() {
root = parent.to_path_buf();
} else {

View File

@@ -0,0 +1,240 @@
// Test script to validate hierarchical tag FTS fix
// This script demonstrates how the fix works with a simple test case
use rusqlite::{Connection, params};
use std::path::Path;
use std::fs;
use anyhow::Result;
fn main() -> Result<()> {
// Create a test database in a temporary location
let db_path = Path::new("/tmp/marlin_test.db");
if db_path.exists() {
fs::remove_file(db_path)?;
}
println!("Creating test database at {:?}", db_path);
// Initialize database with our schema and migrations
let conn = Connection::open(db_path)?;
// Apply schema (simplified version of what's in the migrations)
println!("Applying schema...");
conn.execute_batch(
"PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
CREATE TABLE files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
size INTEGER,
mtime INTEGER,
hash TEXT
);
CREATE TABLE tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
UNIQUE(name, parent_id)
);
CREATE TABLE file_tags (
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY(file_id, tag_id)
);
CREATE TABLE attributes (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT,
UNIQUE(file_id, key)
);
CREATE VIRTUAL TABLE files_fts
USING fts5(
path,
tags_text,
attrs_text,
content='',
tokenize=\"unicode61 remove_diacritics 2\"
);"
)?;
// Apply our fixed triggers
println!("Applying fixed FTS triggers...");
conn.execute_batch(
"CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id,
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path as tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = NEW.id
UNION
SELECT t.name as tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path as tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name as tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;"
)?;
// Insert test data
println!("Inserting test data...");
// Insert a test file
conn.execute(
"INSERT INTO files (id, path) VALUES (1, '/test/document.md')",
[],
)?;
// Create hierarchical tags: project/md
println!("Creating hierarchical tags: project/md");
// Insert parent tag 'project'
conn.execute(
"INSERT INTO tags (id, name, parent_id) VALUES (1, 'project', NULL)",
[],
)?;
// Insert child tag 'md' under 'project'
conn.execute(
"INSERT INTO tags (id, name, parent_id) VALUES (2, 'md', 1)",
[],
)?;
// Tag the file with the 'md' tag (which is under 'project')
conn.execute(
"INSERT INTO file_tags (file_id, tag_id) VALUES (1, 2)",
[],
)?;
// Check what's in the FTS index
println!("\nChecking FTS index content:");
let mut stmt = conn.prepare("SELECT rowid, path, tags_text, attrs_text FROM files_fts")?;
let rows = stmt.query_map([], |row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
))
})?;
for row in rows {
let (id, path, tags, attrs) = row?;
println!("ID: {}, Path: {}, Tags: '{}', Attrs: '{}'", id, path, tags, attrs);
}
// Test searching for the full hierarchical tag path
println!("\nTesting search for 'project/md':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project/md'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'project/md'");
}
// Test searching for just the parent tag
println!("\nTesting search for just 'project':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'project'");
}
// Test searching for just the child tag
println!("\nTesting search for just 'md':");
let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'md'")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut found = false;
for row in rows {
found = true;
println!("Found file: {}", row?);
}
if !found {
println!("No files found with tag 'md'");
}
println!("\nTest completed successfully!");
Ok(())
}

Binary file not shown.

View File

@@ -1 +1 @@
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs

103
tests/e2e.rs Normal file
View File

@@ -0,0 +1,103 @@
//! End-to-end smoke-tests for the marlin binary.
//!
//! Run with `cargo test --test e2e` or let CI invoke `cargo test`.
use assert_cmd::prelude::*;
use predicates::prelude::*;
use std::{fs, path::PathBuf, process::Command};
use tempfile::tempdir;
/// Absolute path to the `marlin` binary Cargo just built for this test run.
fn marlin_bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_marlin"))
}
fn spawn_demo_tree(root: &PathBuf) {
fs::create_dir_all(root.join("Projects/Alpha")).unwrap();
fs::create_dir_all(root.join("Projects/Beta")).unwrap();
fs::create_dir_all(root.join("Projects/Gamma")).unwrap();
fs::create_dir_all(root.join("Logs")).unwrap();
fs::create_dir_all(root.join("Reports")).unwrap();
fs::write(root.join("Projects/Alpha/draft1.md"), "- [ ] TODO foo\n").unwrap();
fs::write(root.join("Projects/Alpha/draft2.md"), "- [x] TODO foo\n").unwrap();
fs::write(root.join("Projects/Beta/final.md"), "done\n").unwrap();
fs::write(root.join("Projects/Gamma/TODO.txt"), "TODO bar\n").unwrap();
fs::write(root.join("Logs/app.log"), "ERROR omg\n").unwrap();
fs::write(root.join("Reports/Q1.pdf"), "PDF\n").unwrap();
}
fn run(cmd: &mut Command) -> assert_cmd::assert::Assert {
cmd.assert().success()
}
#[test]
fn full_cli_flow() -> Result<(), Box<dyn std::error::Error>> {
// 1. sandbox
let tmp = tempdir()?;
let demo_dir = tmp.path().join("marlin_demo");
spawn_demo_tree(&demo_dir);
// 2. init (auto-scan cwd)
run(Command::new(marlin_bin())
.current_dir(&demo_dir)
.arg("init"));
// 3. tag & attr
run(Command::new(marlin_bin())
.arg("tag")
.arg(format!("{}/Projects/**/*.md", demo_dir.display()))
.arg("project/md"));
run(Command::new(marlin_bin())
.arg("attr")
.arg("set")
.arg(format!("{}/Reports/*.pdf", demo_dir.display()))
.arg("reviewed")
.arg("yes"));
// 4. search expectations
Command::new(marlin_bin())
.arg("search")
.arg("TODO")
.assert()
.stdout(predicate::str::contains("TODO.txt"));
Command::new(marlin_bin())
.arg("search")
.arg("attr:reviewed=yes")
.assert()
.stdout(predicate::str::contains("Q1.pdf"));
// 5. link & backlinks
let foo = demo_dir.join("foo.txt");
let bar = demo_dir.join("bar.txt");
fs::write(&foo, "")?;
fs::write(&bar, "")?;
run(Command::new(marlin_bin()).arg("scan").arg(&demo_dir));
run(Command::new(marlin_bin())
.arg("link").arg("add")
.arg(&foo).arg(&bar));
Command::new(marlin_bin())
.arg("link").arg("backlinks").arg(&bar)
.assert()
.stdout(predicate::str::contains("foo.txt"));
// 6. backup / restore round-trip
let backup_path = String::from_utf8(
Command::new(marlin_bin()).arg("backup").output()?.stdout
)?;
let backup_file = backup_path.split_whitespace().last().unwrap();
// wipe DB file
std::fs::remove_file(dirs::data_dir().unwrap().join("marlin/index.db"))?;
run(Command::new(marlin_bin()).arg("restore").arg(backup_file));
// sanity: search still works
Command::new(marlin_bin())
.arg("search").arg("TODO")
.assert()
.stdout(predicate::str::contains("TODO.txt"));
Ok(())
}

68
tests/test.md Normal file
View File

@@ -0,0 +1,68 @@
# Testing
Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**.
---
## 0 Prepare once
```bash
# Run once (or add to ~/.bashrc) so debug + release artefacts land
# in the same predictable place. Speeds-up future builds.
export CARGO_TARGET_DIR=target
```
---
## 1 Build the new binary
```bash
git pull # grab the latest commit
cargo build --release
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
* `cargo build --release` builds the optimised binary.
* `install …` copies it into your `$PATH` so `marlin` on the CLI is the fresh one.
---
## 2 Run the smoke-test suite
```bash
# Runs the end-to-end test we added in tests/e2e.rs
cargo test --test e2e -- --nocapture
```
* `--test e2e` compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like).
* `--nocapture` streams stdout/stderr so you can watch each CLI step in real time.
* Exit-code **0** ➜ everything passed.
Any non-zero exit or a red ✗ line means a step failed; the asserts diff will show the command and its output.
---
## 3 (Optionally) run all tests
```bash
cargo test --all -- --nocapture
```
This will execute:
* unit tests in `src/**`
* every file in `tests/`
* doc-tests
If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically.
---
### One-liner helper (copy/paste)
```bash
git pull && cargo build --release &&
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin &&
cargo test --test e2e -- --nocapture
```
Stick that in a shell alias (`alias marlin-ci='…'`) and youve got a 5-second upgrade-and-verify loop.