diff --git a/Cargo.lock b/Cargo.lock index 1426346..edbd8f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,22 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +[[package]] +name = "assert_cmd" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "libc", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -106,6 +122,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "regex-automata 0.4.9", + "serde", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -163,6 +190,15 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_complete" +version = "4.5.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91d3baa3bcd889d60e6ef28874126a0b384fd225ab83aa6d8a801c519194ce1" +dependencies = [ + "clap", +] + [[package]] name = "clap_derive" version = "4.5.32" @@ -193,6 +229,12 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "directories" version = "5.0.1" @@ -202,6 +244,15 @@ dependencies = [ "dirs-sys 0.4.1", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" @@ -235,6 +286,22 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "errno" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -247,6 +314,21 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -255,7 +337,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -318,6 +412,12 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "js-sys" version = "0.3.77" @@ -361,6 +461,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "log" version = "0.4.27" @@ -372,13 +478,19 @@ name = "marlin" version = "0.1.0" dependencies = [ "anyhow", + "assert_cmd", "chrono", "clap", + "clap_complete", "directories", + "dirs 5.0.1", "glob", + "predicates", "rusqlite", + "serde_json", "shellexpand", "shlex", + "tempfile", "tracing", "tracing-subscriber", "walkdir", @@ -399,6 +511,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -448,6 +566,36 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -466,13 +614,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "redox_users" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 1.0.69", ] @@ -483,7 +637,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 2.0.12", ] @@ -546,12 +700,31 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rustversion" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "same-file" version = "1.0.6" @@ -561,6 +734,38 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -576,7 +781,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" dependencies = [ - "dirs", + "dirs 6.0.0", ] [[package]] @@ -608,6 +813,25 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + [[package]] name = "thiserror" version = "1.0.69" @@ -749,6 +973,15 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -765,6 +998,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -1052,6 +1294,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "zerocopy" version = "0.8.25" diff --git a/Cargo.toml b/Cargo.toml index 438a4a3..6e3c586 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,4 +15,17 @@ walkdir = "2.5" shlex = "1.3" chrono = "0.4" shellexpand = "3.1" +clap_complete = "4.1" +serde_json = { version = "1", optional = true } # <-- NEW +[dev-dependencies] +assert_cmd = "2" +predicates = "3" +tempfile = "3" +dirs = "5" # cross-platform data dir helper + +[features] +# The CLI prints JSON only when this feature is enabled. +# Having the feature listed silences the `unexpected cfg` lint even +# when you don’t turn it on. +json = ["serde_json"] \ No newline at end of file diff --git a/README.md b/README.md index ffc35ff..55b467c 100644 --- a/README.md +++ b/README.md @@ -2,21 +2,24 @@ # Marlin -**Marlin** is a lightweight, metadata-driven file indexer that runs 100 % on your computer. It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, takes automatic snapshots, and offers instant full-text search via FTS5. -*No cloud, no telemetry – your data never leaves the machine.* +**Marlin** is a lightweight, metadata-driven file indexer that runs **100 % on your computer**. +It scans folders, stores paths and file stats in SQLite, lets you attach hierarchical **tags** and **custom attributes**, keeps timestamped **snapshots**, and offers instant full-text search via FTS5. +_No cloud, no telemetry – your data never leaves the machine._ --- ## Feature highlights -| Area | What you get | -| -------------- | --------------------------------------------------------------------------------- | -| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) | -| **Resilience** | Versioned, idempotent schema migrations – zero-downtime upgrades | -| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency | -| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) | -| **Search** | Prefix-aware FTS5 across paths, tags, and attributes; optional `--exec` per match | -| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation | +| Area | What you get | +| ------------------- | ----------------------------------------------------------------------------------------------------- | +| **Safety** | Timestamped backups (`marlin backup`) and one-command restore (`marlin restore`) | +| **Resilience** | Versioned, idempotent schema migrations – zero-downtime upgrades | +| **Indexing** | Fast multi-path scanner with SQLite WAL concurrency | +| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) | +| **Relations** | Typed file ↔ file links (`marlin link`) with backlinks viewer | +| **Collections / Views** | Named playlists (`marlin coll`) & saved searches (`marlin view`) for instant recall | +| **Search** | Prefix-aware FTS5 across paths, tags, attrs & links; optional `--exec` per match
(grep-style context snippets coming Q3) | +| **DX / Logs** | Structured tracing (`RUST_LOG=debug`) for every operation | --- @@ -26,11 +29,11 @@ ┌──────────────┐ marlin scan ┌─────────────┐ │ your files │ ─────────────────────▶│ SQLite │ │ (any folder) │ │ files/tags │ -└──────────────┘ tag / attr │ attrs / FTS │ - ▲ search / exec └──────┬──────┘ +└──────────────┘ tag / attr / link │ attrs / FTS │ + ▲ search / exec └──────┬──────┘ └────────── backup / restore ▼ timestamped snapshots -``` +```` --- @@ -38,7 +41,7 @@ | Requirement | Why | | ------------------ | ----------------------------- | -| **Rust** ≥ 1.77 | Build toolchain (`rustup.rs`) | +| **Rust ≥ 1.77** | Build toolchain (`rustup.rs`) | | C build essentials | Builds bundled SQLite (Linux) | macOS & Windows users: let the Rust installer pull the matching build tools. @@ -48,32 +51,83 @@ macOS & Windows users: let the Rust installer pull the matching build tools. ## Build & install ```bash -git clone https://github.com/yourname/marlin.git -cd marlin +git clone https://github.com/PR0M3TH3AN/Marlin.git +cd Marlin cargo build --release -sudo install -Dm755 target/release/marlin /usr/local/bin/marlin # optional + +# (Optional) install into your PATH +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin ``` --- ## Quick start +For a concise walkthrough—including **links, collections and views**—see +[**Quick start & Demo**](marlin_demo.md). + +--- + +## Testing + +Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**. + +### 0 Prepare once + ```bash -marlin init # create DB (idempotent) -marlin scan ~/Pictures ~/Documents # index files -marlin tag "~/Pictures/**/*.jpg" photos/trip-2024 # add tag -marlin attr set "~/Documents/**/*.pdf" reviewed yes -marlin search reviewed --exec "xdg-open {}" # open matches -marlin backup # snapshot DB +# Put build artefacts in one place (faster incremental builds) +export CARGO_TARGET_DIR=target ``` +### 1 Build the new binary + +```bash +git pull +cargo build --release +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin +``` + +### 2 Run the smoke-test suite + +```bash +cargo test --test e2e -- --nocapture +``` + +*Streams CLI output live; exit-code 0 = all good.* + +### 3 (Optionally) run **all** tests + +```bash +cargo test --all -- --nocapture +``` + +This now covers: + +* unit tests in `src/**` +* positive & negative integration suites (`tests/pos.rs`, `tests/neg.rs`) +* doc-tests + +#### One-liner helper + +```bash +git pull && cargo build --release && +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin && +cargo test --test e2e -- --nocapture +``` + +Alias it as `marlin-ci` for a 5-second upgrade-and-verify loop. + +--- + ### Database location -* **Linux** `~/.local/share/marlin/index.db` -* **macOS** `~/Library/Application Support/marlin/index.db` -* **Windows** `%APPDATA%\marlin\index.db` +| OS | Default path | +| ----------- | ----------------------------------------------- | +| **Linux** | `~/.local/share/marlin/index.db` | +| **macOS** | `~/Library/Application Support/marlin/index.db` | +| **Windows** | `%APPDATA%\marlin\index.db` | -Override with: +Override: ```bash export MARLIN_DB_PATH=/path/to/custom.db @@ -86,190 +140,57 @@ export MARLIN_DB_PATH=/path/to/custom.db ```text marlin [ARGS] -init create / migrate database -scan ... walk directories & index files -tag "" add hierarchical tag -attr set|ls … manage custom attributes -search [--exec CMD] FTS query, optionally run CMD on each hit -backup create timestamped snapshot in backups/ -restore replace DB with snapshot +init create / migrate DB **and perform an initial scan of the cwd** +scan ... walk directories & (re)index files +tag "" add hierarchical tag +attr set set or update custom attribute +attr ls list attributes +link add|rm|list|backlinks manage typed file-to-file relations +coll create|add|list manage named collections (“playlists”) +view save|list|exec save and run smart views (saved queries) +search [--exec CMD] FTS5 query; optionally run CMD per hit +backup create timestamped snapshot in `backups/` +restore replace DB with snapshot +completions generate shell completions ``` -### Attribute subcommands +### Attribute sub-commands -| Command | Example | -| ---------- | ------------------------------------------------ | -| `attr set` | `marlin attr set "~/Docs/**/*.pdf" reviewed yes` | -| `attr ls` | `marlin attr ls ~/Docs/report.pdf` | +| Command | Example | +| ----------- | ------------------------------------------------ | +| `attr set` | `marlin attr set ~/Docs/**/*.pdf reviewed yes` | +| `attr ls` | `marlin attr ls ~/Docs/report.pdf` | +| JSON output | `marlin --format=json attr ls ~/Docs/report.pdf` | --- ## Backups & restore -*Create snapshot* - ```bash marlin backup # → ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db ``` -*Restore snapshot* - ```bash marlin restore ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db ``` -Marlin also takes an **automatic safety backup before every schema migration**. +> Marlin also creates an **automatic safety backup before every non-`init` command.** +> *Auto-prune (`backup --prune `) lands in Q2.* --- ## Upgrading ```bash -cargo install --path . --force # rebuild & replace installed binary +cargo install --path . --force # rebuild & replace installed binary ``` -The versioned migration system preserves your data across upgrades. - ---- - -## Roadmap - -See [`ROADMAP.md`](./ROADMAP.md) for the full development plan. - ---- - -## Five-Minute Quickstart - -Paste & run each block in your terminal. - ---- - -### 0 Prepare & build - -```bash -# Clone or cd into your Marlin repo -cd ~/Documents/GitHub/Marlin - -# Build the release binary -cargo build --release -``` - ---- - -### 1 Install on your PATH - -```bash -sudo install -Dm755 target/release/marlin /usr/local/bin/marlin -``` - -> Now `marlin` is available everywhere. - ---- - -### 2 Prepare a clean demo directory - -```bash -rm -rf ~/marlin_demo -mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs} - -printf "Alpha draft\n" > ~/marlin_demo/Projects/Alpha/draft.txt -printf "Beta notes\n" > ~/marlin_demo/Projects/Beta/notes.md -printf "Receipt PDF\n" > ~/marlin_demo/Docs/receipt.pdf -printf "fake jpg\n" > ~/marlin_demo/Media/Photos/vacation.jpg -``` - ---- - -### 3 Initialize & index files - -```bash -# Use --verbose if you want full debug traces: -marlin init -marlin scan ~/marlin_demo - -# or, to see every path tested: -marlin --verbose init -marlin --verbose scan ~/marlin_demo -``` - -> **Tip:** Rerun `marlin scan` after you add/remove/modify files; only changed files get re-indexed. - ---- - -### 4 Attach tags & attributes - -```bash -# Tag everything under “Alpha” -marlin tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha - -# Mark all PDFs as reviewed -marlin attr set "~/marlin_demo/**/*.pdf" reviewed yes - -# (or with debug) -marlin --verbose tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha -marlin --verbose attr set "~/marlin_demo/**/*.pdf" reviewed yes -``` - ---- - -### 5 Search your index - -```bash -# By tag or filename -marlin search alpha - -# Combined terms (AND across path+attrs) -marlin search "reviewed AND pdf" - -# Run a command on each hit -marlin search reviewed --exec "echo HIT → {}" - -# If things aren’t matching, add --verbose to see the underlying FTS query: -marlin --verbose search "reviewed AND pdf" -``` - -> `{}` in `--exec` is replaced with each file’s path. - ---- - -### 6 Backup & restore - -```bash -# Snapshot and store its name -snap=$(marlin backup | awk '{print $NF}') - -# Simulate data loss -rm ~/.local/share/marlin/index.db - -# Restore instantly -marlin restore "$snap" - -# Verify your files still show up -marlin search reviewed -``` - -> Backups live under `~/.local/share/marlin/backups` by default. - -##### What you just exercised - -| Command | Purpose | -| ----------------- | ----------------------------------------- | -| `marlin init` | Create / upgrade the SQLite database | -| `marlin scan` | Walk directories and (re)index files | -| `marlin tag` | Attach hierarchical tags | -| `marlin attr set` | Add/overwrite custom key-value attributes | -| `marlin search` | FTS5 search across path / tags / attrs | -| `--exec` | Pipe hits into any shell command | -| `marlin backup` | Timestamped snapshot of the DB | -| `marlin restore` | Replace live DB with a chosen snapshot | - -That’s the complete surface area of Marlin today—feel free to play around or -point the scanner at real folders. - +Versioned migrations preserve your data across upgrades. --- ## License -MIT – see `LICENSE` +MIT – see [`LICENSE`](LICENSE). + diff --git a/bar.txt b/bar.txt new file mode 100644 index 0000000..e69de29 diff --git a/foo.txt b/foo.txt new file mode 100644 index 0000000..e69de29 diff --git a/marlin_demo.md b/marlin_demo.md new file mode 100644 index 0000000..28c2b14 --- /dev/null +++ b/marlin_demo.md @@ -0,0 +1,183 @@ +# Marlin Demo 🚀 + +Below is a **“hello-world” walk-through** that matches the current `main` +branch (auto-scan on `marlin init`, no more forced-migration chatter, cleaner +build). Everything runs offline on a throw-away directory under `~/marlin_demo`. + +--- + +## 0 Build & install Marlin + +```bash +# inside the repo +export CARGO_TARGET_DIR=target # <-- speeds up future builds (once) +cargo build --release # build the new binary +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin +# (cargo install --path . --locked --force works too) +```` + +--- + +## 1 Create the demo tree + +```bash +rm -rf ~/marlin_demo +mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta,Gamma},Logs,Reports,Scripts,Media/Photos} +# (zsh users: quote the pattern or enable braceexpand first) + +# ── Projects ─────────────────────────────────────────────────── +cat < ~/marlin_demo/Projects/Alpha/draft1.md +# Alpha draft 1 +- [ ] TODO: outline architecture +- [ ] TODO: write tests +EOF +cat < ~/marlin_demo/Projects/Alpha/draft2.md +# Alpha draft 2 +- [x] TODO: outline architecture +- [ ] TODO: implement feature X +EOF +cat < ~/marlin_demo/Projects/Beta/notes.md +Beta meeting notes: + +- decided on roadmap +- ACTION: follow-up with design team +EOF +cat < ~/marlin_demo/Projects/Beta/final.md +# Beta Final +All tasks complete. Ready to ship! +EOF +cat < ~/marlin_demo/Projects/Gamma/TODO.txt +Gamma tasks: +TODO: refactor module Y +EOF + +# ── Logs & Reports ───────────────────────────────────────────── +echo "2025-05-15 12:00:00 INFO Starting app" > ~/marlin_demo/Logs/app.log +echo "2025-05-15 12:01:00 ERROR Oops, crash" >> ~/marlin_demo/Logs/app.log +echo "2025-05-15 00:00:00 INFO System check OK" > ~/marlin_demo/Logs/system.log +printf "Q1 financials\n" > ~/marlin_demo/Reports/Q1_report.pdf + +# ── Scripts & Media ──────────────────────────────────────────── +cat <<'EOF' > ~/marlin_demo/Scripts/deploy.sh +#!/usr/bin/env bash +echo "Deploying version $1…" +EOF +chmod +x ~/marlin_demo/Scripts/deploy.sh +echo "JPEGDATA" > ~/marlin_demo/Media/Photos/event.jpg +``` + +--- + +## 2 Initialise **and** index (one step) + +```bash +cd ~/marlin_demo # run init from the folder you want indexed +marlin init # • creates or migrates DB + # • runs *first* full scan of this directory +``` + +Add more directories later with `marlin scan `. + +--- + +## 3 Tagging examples + +```bash +# Tag all project markdown as ‘project/md’ +marlin tag '~/marlin_demo/Projects/**/*.md' project/md + +# Tag your logs +marlin tag '~/marlin_demo/Logs/**/*.log' logs/app + +# Tag everything under Beta as ‘project/beta’ +marlin tag '~/marlin_demo/Projects/Beta/**/*' project/beta +``` + +--- + +## 4 Set custom attributes + +```bash +marlin attr set '~/marlin_demo/Projects/Beta/final.md' status complete +marlin attr set '~/marlin_demo/Reports/*.pdf' reviewed yes +``` + +--- + +## 5 Play with search / exec hooks + +```bash +marlin search TODO +marlin search tag:project/md +marlin search 'tag:logs/app AND ERROR' +marlin search 'attr:status=complete' +marlin search 'attr:reviewed=yes AND pdf' +marlin search 'attr:reviewed=yes' --exec 'xdg-open {}' +marlin --format=json search 'attr:status=complete' # machine-readable output +``` + +--- + +## 6 Verbose mode + +```bash +marlin --verbose scan ~/marlin_demo # watch debug logs stream by +``` + +--- + +## 7 Snapshot & restore + +```bash +snap=$(marlin backup | awk '{print $NF}') +rm ~/.local/share/marlin/index.db # simulate disaster +marlin restore "$snap" +marlin search TODO # still works +``` + +*(Reminder: Marlin also makes an **auto-backup** before every non-`init` +command, so manual snapshots are extra insurance.)* + +--- + +## 8 Linking demo + +```bash +touch ~/marlin_demo/foo.txt ~/marlin_demo/bar.txt +marlin scan ~/marlin_demo # index the new files + +foo=~/marlin_demo/foo.txt +bar=~/marlin_demo/bar.txt + +marlin link add "$foo" "$bar" --type references # create typed link +marlin link list "$foo" # outgoing links from foo +marlin link backlinks "$bar" # incoming links to bar +``` + +--- + +## 9 Collections & smart views + +```bash +# Collection +marlin coll create SetA +marlin coll add SetA '~/marlin_demo/Projects/**/*.md' +marlin coll list SetA + +# Saved view (smart folder) +marlin view save tasks 'attr:status=complete OR TODO' +marlin view exec tasks +``` + +--- + +### Recap + +* `cargo build --release` + `sudo install …` is still the build path. +* **`marlin init`** scans the **current working directory** on first run. +* Scan again only when you add *new* directories (`marlin scan …`). +* Auto-backups happen before every command; manual `marlin backup` gives you extra restore points. + +Happy organising! + +``` \ No newline at end of file diff --git a/roadmap.md b/roadmap.md index 9032721..c45e256 100644 --- a/roadmap.md +++ b/roadmap.md @@ -1,40 +1,59 @@ -Here’s the updated roadmap with each new feature slotted in where its dependencies are best met: +# Marlin Roadmap 2025 → 2026 📜 -| Phase | Focus | Why now? | Key deliverables | -| -------------------------- | -------------------------------------- | --------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | -| **1. 2025-Q2 – “Bedrock”** | Migrations + CI baseline + core schema | We’ve stabilized migrations; now add foundational tables for links, groups, views | • CI: `cargo test` + `cargo sqlx migrate run --dry-run`
• New migrations: | +This document outlines the **official delivery plan** for Marlin over the next four quarters. +Every work-item below is *time-boxed, testable,* and traceable back to an end-user benefit. -* `links(src_file,dst_file,link_type)` -* `collections(name)` + `collection_files` -* `views(name,query)`
• CLI stubs for `marlin link` / `unlink` / `list-links` / `backlinks`, `marlin coll` and `marlin view` | - \| **2. 2025-Q2** | Leaner FTS maintenance | Per-row triggers don’t scale past \~100 k files | • Replace per-row triggers with a “dirty” flag + periodic rebuild
• Benchmark end-to-end on 100 k files | - \| **2.1 2025-Q2** | Dirty-row FTS + CI | Prep for both scale and live-watcher—avoid full rebuilds on every change | • `scan --dirty` reindexes only changed files
• CI coverage for dirty-scan edge cases | - \| **2.2 2025-Q2** | Live file watching | Offer true “working-dir” mode—auto-scan on FS events | • `marlin watch [dir]` via `notify` crate
• Incremental scan on create/modify/delete/rename | - \| **2.3 2025-Q2** | Self-pruning backups | Instant protection and bounded storage—no manual snapshot cleanup | • `marlin backup --prune ` flag
• Post-scan hook to prune to latest 10
• Daily prune automation (cron or CI) | - \| **3. 2025-Q3** | FTS5 content indexing & annotations | Full-text search over file bodies + per-file notes/highlights | • Add `files.content` column + migration
• Extend `files_fts` to include `content`
• New `annotations` table + FTS triggers
• CLI: `marlin annotate add|list` | - \| **4. 2025-Q3** | Content hashing, dedup & versioning | Detect duplicates, track history, enable diffs | • Populate `files.hash` with SHA-256
• `scan --rehash` option
• CLI: `marlin version diff ` | - \| **5. 2025-Q3** | Tag aliases/canonicals & semantic/AI enhancements | Control tag sprawl and lay groundwork for AI-driven suggestions | • Enforce `canonical_id` on `tags` + `tag alias add|ls|rm` CLI
• Create `embeddings` table
• `scan --embed` to generate vectors
• CLI: `marlin tag suggest`, `marlin summary `, `marlin similarity scan` | - \| **6. 2025-Q4** | Search DSL v2 & Smart Views | More powerful query grammar + reusable “virtual folders” | • Replace ad-hoc parser with a `nom`-based grammar
• CLI: `marlin view save|list|exec` | - \| **7. 2025-Q4** | Attribute templates, states, tasks & timeline | Structured metadata unlocks workflows, reminders & temporal context | • `templates` + `template_fields` tables + validation
• CLI: -* `marlin state set|transitions add|state log` -* `marlin task scan|task list` -* `marlin remind set ""` -* `marlin event add ""`, `marlin timeline` | - \| **8. 2026-Q1** | Dolphin read-only plugin | Surface metadata, links, annotations in native file manager | • Qt sidebar showing tags, attributes, links, annotations | - \| **9. 2026-Q1** | Full edit UI | After proving read-only stable, add in-place editing | • Tag editor, collection & view managers, state/task/event dialogs | - \| **10. 2026-Q2** | Multi-device sync | Final frontier: optional sync/replication layer | • Choose sync backend (rqlite / Litestream / bespoke)
• Support read-only mounts for remote indexes | +> **Legend** +> ✅ = item added/clarified in the latest planning round +> Δ = new sub-deliverable (wasn’t in the previous version) --- -### Current sprint (ends **2025-06-01**) +## 1 Bird’s-eye Table -1. FTS rebuild prototype (dirty-rows) – measure on 50 k files -2. `backup --prune` implementation + auto-prune hook -3. Integration tests for tag/attr workflows on Windows via GitHub Actions -4. **New:** basic `links`, `collections`, `views` migrations + CLI stubs +| Phase / Sprint | Timeline | Focus & Rationale | Key Deliverables (Δ = new) | | | +| ----------------------------------------------- | ------------------------- | ------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------ | +| **Sprint α – Bedrock & Metadata Domains** | **2025-Q2 (now → 6 Jun)** | Stabilise schema & CI; land first metadata domains with discoverability. | Δ CI: `cargo test` + SQL dry-run
Δ Unit tests (`determine_scan_root`, `escape_fts`)
Δ Coverage: e2e `attr --format=json`
Δ Refactor: move `naive_substring_search` to shared util
Migrations: `links`, `collections`, `views`
CLI stubs: `link`, `coll`, `view`
`marlin demo` walkthrough | | | +| **Epic 1 – Scale & Reliability** | 2025-Q2 | Keep scans fast; bullet-proof CI at 100 k files. | Δ Dirty-flag column + `scan --dirty`
Benchmarks: full vs dirty scan (100 k)
Replace per-row triggers with periodic rebuild
CI edge-case tests | | | +| **Epic 2 – Live Mode & Self-Pruning Backups** | 2025-Q2 | Continuous indexing & hygiene—Marlin “just works”. | Δ `marlin watch [dir]` (notify/FSEvents)
Δ `backup --prune ` + auto-prune post-scan
Daily / PR-merge prune in CI | | | +| **Phase 3 – Content FTS & Annotations** | 2025-Q3 | Index file bodies, grep-style context, inline notes. | `files.content` + migration
Extend `files_fts` (context snippets `-C`)
`annotations` table + triggers
CLI \`annotate add | list\` | | +| **Phase 4 – Versioning & Deduplication** | 2025-Q3 | History, diffs & duplicate detection. | `files.hash` (SHA-256)
`scan --rehash` refresh
CLI `version diff ` | | | +| **Phase 5 – Tag Aliases & Semantic Booster** | 2025-Q3 | Tame tag sprawl; seed AI-powered suggestions. | `canonical_id` on `tags`; CLI `tag alias …`
`embeddings` table + `scan --embed`
CLI `tag suggest`, `similarity scan`, `summary ` | | | +| **Phase 6 – Search DSL v2 & Smart Views** | 2025-Q4 | Robust grammar + virtual folders. | Replace parser with **`nom`** grammar (`AND`, `OR`, `()` …)
CLI \`view save | list | exec\` with aliases & paging | +| **Phase 7 – Structured Workflows** | 2025-Q4 | First-class task / state / reminder / event life-cycles. | ✅ State engine (`files.state`, `state_changes`)
CLI \`state set | transitions add | log`
✅ Task extractor (`tasks` table) + CLI
`templates`+ validation
CLI`remind …`, `event …`, `timeline\` | +| **Phase 8 – Lightweight Integrations** | 2026-Q1 | Surface Marlin in editors / terminal. | VS Code & TUI extension (tags / attrs / links / notes) | | | +| **Phase 9 – Dolphin Sidebar Plugin (MVP)** | 2026-Q1 | Read-only Qt sidebar for Linux file managers. | Qt plug-in: tags, attrs, links, annotations | | | +| **Phase 10 – Full Edit UI & Multi-Device Sync** | 2026-Q2 | In-place metadata editor & optional sync layer. | GUI editors (tags, views, tasks, reminders, events)
Pick/implement sync backend (rqlite, Litestream, …) | | | -**Development principles remain**: +--- -* Local-first, offline-capable -* Ship code = ship migrations -* Instrumentation first (trace spans & timings on all new commands) +## 2 Narrative & Dependencies + +1. **Lock down core schema & demo** *(Sprint α).* + Developers get immediate feedback via the `marlin demo` command while CI ensures migrations never regress. + +2. **Scale & Live Mode** *(Epics 1-2).* + Dirty scanning, file-watching and auto-pruned backups guarantee snappy, hands-off operation even on six-figure corpora. + +3. **Richer Search** *(Phases 3-6).* + Body-content FTS + grep-style snippets lay the groundwork; `nom` grammar then elevates power-user queries and smart views. + +4. **Workflow Layers** *(Phase 7).* + State transitions, tasks and reminders turn Marlin from a passive index into an active workflow engine. + +5. **UX Expansions** *(Phases 8-10).* + Start lightweight (VS Code / TUI), graduate to a read-only Dolphin plug-in, then ship full editing & sync for multi-device teams. + +Every outer milestone depends only on the completion of the rows above it, **so shipping discipline in early sprints de-risks the headline features down the line.** + +--- + +## 3 Next Steps + +* **Sprint α kickoff:** break deliverables into stories, estimate, assign. +* **Add roadmap as `docs/ROADMAP.md`** (this file). +* Wire a **Checklist issue** on GitHub: one task per Δ bullet for instant tracking. + +--- + +*Last updated · 2025-05-16* diff --git a/src/cli.rs b/src/cli.rs index b444222..bcb2bde 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,15 +1,36 @@ // src/cli.rs -use std::path::PathBuf; -use clap::{Parser, Subcommand}; +pub mod link; +pub mod coll; +pub mod view; +pub mod state; +pub mod task; +pub mod remind; +pub mod annotate; +pub mod version; +pub mod event; + +use clap::{Parser, Subcommand, ValueEnum}; +use clap_complete::Shell; + +/// Output format for commands. +#[derive(ValueEnum, Clone, Copy, Debug)] +pub enum Format { + Text, + Json, +} /// Marlin – metadata-driven file explorer (CLI utilities) #[derive(Parser, Debug)] -#[command(author, version, about)] +#[command(author, version, about, propagate_version = true)] pub struct Cli { /// Enable debug logging and extra output #[arg(long)] pub verbose: bool, + /// Output format (text or JSON) + #[arg(long, default_value = "text", value_enum, global = true)] + pub format: Format, + #[command(subcommand)] pub command: Commands, } @@ -21,12 +42,15 @@ pub enum Commands { /// Scan one or more directories and populate the file index Scan { - paths: Vec, + /// Directories to scan (defaults to cwd) + paths: Vec, }, /// Tag files matching a glob pattern (hierarchical tags use `/`) Tag { + /// Glob or path pattern pattern: String, + /// Hierarchical tag name (`foo/bar`) tag_path: String, }, @@ -46,14 +70,58 @@ pub enum Commands { /// Create a timestamped backup of the database Backup, - /// Restore from a backup file (over-writes current DB) + /// Restore from a backup file (overwrites current DB) Restore { - backup_path: PathBuf, + backup_path: std::path::PathBuf, }, + + /// Generate shell completions (hidden) + #[command(hide = true)] + Completions { + /// Which shell to generate for + #[arg(value_enum)] + shell: Shell, + }, + + /// File-to-file links + #[command(subcommand)] + Link(link::LinkCmd), + + /// Collections (groups) of files + #[command(subcommand)] + Coll(coll::CollCmd), + + /// Smart views (saved queries) + #[command(subcommand)] + View(view::ViewCmd), + + /// Workflow states on files + #[command(subcommand)] + State(state::StateCmd), + + /// TODO/tasks management + #[command(subcommand)] + Task(task::TaskCmd), + + /// Reminders on files + #[command(subcommand)] + Remind(remind::RemindCmd), + + /// File annotations and highlights + #[command(subcommand)] + Annotate(annotate::AnnotateCmd), + + /// Version diffs + #[command(subcommand)] + Version(version::VersionCmd), + + /// Calendar events & timelines + #[command(subcommand)] + Event(event::EventCmd), } #[derive(Subcommand, Debug)] pub enum AttrCmd { Set { pattern: String, key: String, value: String }, - Ls { path: PathBuf }, + Ls { path: std::path::PathBuf }, } diff --git a/src/cli/annotate.rs b/src/cli/annotate.rs new file mode 100644 index 0000000..50db9d5 --- /dev/null +++ b/src/cli/annotate.rs @@ -0,0 +1,28 @@ +// src/cli/annotate.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum AnnotateCmd { + Add (ArgsAdd), + List(ArgsList), +} + +#[derive(Args, Debug)] +pub struct ArgsAdd { + pub file: String, + pub note: String, + #[arg(long)] pub range: Option, + #[arg(long)] pub highlight: bool, +} + +#[derive(Args, Debug)] +pub struct ArgsList { pub file_pattern: String } + +pub fn run(cmd: &AnnotateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + AnnotateCmd::Add(a) => todo!("annotate add {:?}", a), + AnnotateCmd::List(a) => todo!("annotate list {:?}", a), + } +} diff --git a/src/cli/coll.rs b/src/cli/coll.rs new file mode 100644 index 0000000..76a40f7 --- /dev/null +++ b/src/cli/coll.rs @@ -0,0 +1,108 @@ +//! `marlin coll …` – named collections of files (simple “playlists”). + +use clap::{Args, Subcommand}; +use rusqlite::Connection; + +use crate::{ + cli::Format, + db, +}; + +#[derive(Subcommand, Debug)] +pub enum CollCmd { + /// Create an empty collection + Create(CreateArgs), + /// Add files (glob) to a collection + Add(AddArgs), + /// List files inside a collection + List(ListArgs), +} + +#[derive(Args, Debug)] +pub struct CreateArgs { + pub name: String, +} + +#[derive(Args, Debug)] +pub struct AddArgs { + pub name: String, + pub file_pattern: String, +} + +#[derive(Args, Debug)] +pub struct ListArgs { + pub name: String, +} + +/// Look-up an existing collection **without** implicitly creating it. +/// +/// Returns the collection ID or an error if it doesn’t exist. +fn lookup_collection_id(conn: &Connection, name: &str) -> anyhow::Result { + conn.query_row( + "SELECT id FROM collections WHERE name = ?1", + [name], + |r| r.get(0), + ) + .map_err(|_| anyhow::anyhow!("collection not found: {}", name)) +} + +pub fn run(cmd: &CollCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> { + match cmd { + /* ── coll create ──────────────────────────────────────────── */ + CollCmd::Create(a) => { + db::ensure_collection(conn, &a.name)?; + if matches!(fmt, Format::Text) { + println!("Created collection '{}'", a.name); + } + } + + /* ── coll add ─────────────────────────────────────────────── */ + CollCmd::Add(a) => { + // Fail if the target collection does not yet exist + let coll_id = lookup_collection_id(conn, &a.name)?; + + let like = a.file_pattern.replace('*', "%"); + let mut stmt = conn.prepare("SELECT id FROM files WHERE path LIKE ?1")?; + let ids: Vec = stmt + .query_map([&like], |r| r.get::<_, i64>(0))? + .collect::>()?; + + for fid in &ids { + db::add_file_to_collection(conn, coll_id, *fid)?; + } + + match fmt { + Format::Text => println!("Added {} file(s) → '{}'", ids.len(), a.name), + Format::Json => { + #[cfg(feature = "json")] + { + println!( + "{{\"collection\":\"{}\",\"added\":{}}}", + a.name, + ids.len() + ); + } + } + } + } + + /* ── coll list ────────────────────────────────────────────── */ + CollCmd::List(a) => { + let files = db::list_collection(conn, &a.name)?; + match fmt { + Format::Text => { + for f in files { + println!("{f}"); + } + } + Format::Json => { + #[cfg(feature = "json")] + { + println!("{}", serde_json::to_string(&files)?); + } + } + } + } + } + Ok(()) +} diff --git a/src/cli/commands.yaml b/src/cli/commands.yaml new file mode 100644 index 0000000..19ea663 --- /dev/null +++ b/src/cli/commands.yaml @@ -0,0 +1,81 @@ +# cli/commands.yaml +# Philosophy: one canonical spec stops drift between docs & code. +link: + description: "Manage typed relationships between files" + actions: + add: + args: [from, to] + flags: ["--type"] + rm: + args: [from, to] + flags: ["--type"] + list: + args: [pattern] + flags: ["--direction", "--type"] + backlinks: + args: [pattern] + +coll: + description: "Manage named collections of files" + actions: + create: + args: [name] + add: + args: [name, file_pattern] + list: + args: [name] + +view: + description: "Save and use smart views (saved queries)" + actions: + save: + args: [view_name, query] + list: {} + exec: + args: [view_name] + +state: + description: "Track workflow states on files" + actions: + set: + args: [file_pattern, new_state] + transitions-add: + args: [from_state, to_state] + log: + args: [file_pattern] + +task: + description: "Extract TODOs and manage tasks" + actions: + scan: + args: [directory] + list: + flags: ["--due-today"] + +remind: + description: "Attach reminders to files" + actions: + set: + args: [file_pattern, timestamp, message] + +annotate: + description: "Add notes or highlights to files" + actions: + add: + args: [file, note] + flags: ["--range", "--highlight"] + list: + args: [file_pattern] + +version: + description: "Versioning and diffs" + actions: + diff: + args: [file] + +event: + description: "Link files to dates/events" + actions: + add: + args: [file, date, description] + timeline: {} diff --git a/src/cli/event.rs b/src/cli/event.rs new file mode 100644 index 0000000..6988be6 --- /dev/null +++ b/src/cli/event.rs @@ -0,0 +1,24 @@ +// src/cli/event.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum EventCmd { + Add (ArgsAdd), + Timeline, +} + +#[derive(Args, Debug)] +pub struct ArgsAdd { + pub file: String, + pub date: String, + pub description: String, +} + +pub fn run(cmd: &EventCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + EventCmd::Add(a) => todo!("event add {:?}", a), + EventCmd::Timeline => todo!("event timeline"), + } +} diff --git a/src/cli/link.rs b/src/cli/link.rs new file mode 100644 index 0000000..16c23c1 --- /dev/null +++ b/src/cli/link.rs @@ -0,0 +1,155 @@ +// src/cli/link.rs + +use crate::db; +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum LinkCmd { + Add(LinkArgs), + Rm (LinkArgs), + List(ListArgs), + Backlinks(BacklinksArgs), +} + +#[derive(Args, Debug)] +pub struct LinkArgs { + pub from: String, + pub to: String, + #[arg(long)] + pub r#type: Option, +} + +#[derive(Args, Debug)] +pub struct ListArgs { + pub pattern: String, + #[arg(long)] + pub direction: Option, + #[arg(long)] + pub r#type: Option, +} + +#[derive(Args, Debug)] +pub struct BacklinksArgs { + pub pattern: String, +} + +pub fn run(cmd: &LinkCmd, conn: &mut Connection, format: Format) -> anyhow::Result<()> { + match cmd { + LinkCmd::Add(args) => { + let src_id = db::file_id(conn, &args.from)?; + let dst_id = db::file_id(conn, &args.to)?; + db::add_link(conn, src_id, dst_id, args.r#type.as_deref())?; + match format { + Format::Text => { + if let Some(t) = &args.r#type { + println!("Linked '{}' → '{}' [type='{}']", args.from, args.to, t); + } else { + println!("Linked '{}' → '{}'", args.from, args.to); + } + } + Format::Json => { + let typ = args + .r#type + .as_ref() + .map(|s| format!("\"{}\"", s)) + .unwrap_or_else(|| "null".into()); + println!( + "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}", + args.from, args.to, typ + ); + } + } + } + LinkCmd::Rm(args) => { + let src_id = db::file_id(conn, &args.from)?; + let dst_id = db::file_id(conn, &args.to)?; + db::remove_link(conn, src_id, dst_id, args.r#type.as_deref())?; + match format { + Format::Text => { + if let Some(t) = &args.r#type { + println!("Removed link '{}' → '{}' [type='{}']", args.from, args.to, t); + } else { + println!("Removed link '{}' → '{}'", args.from, args.to); + } + } + Format::Json => { + let typ = args + .r#type + .as_ref() + .map(|s| format!("\"{}\"", s)) + .unwrap_or_else(|| "null".into()); + println!( + "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}", + args.from, args.to, typ + ); + } + } + } + LinkCmd::List(args) => { + let results = db::list_links( + conn, + &args.pattern, + args.direction.as_deref(), + args.r#type.as_deref(), + )?; + match format { + Format::Json => { + let items: Vec = results + .into_iter() + .map(|(src, dst, t)| { + let typ = t + .as_ref() + .map(|s| format!("\"{}\"", s)) + .unwrap_or_else(|| "null".into()); + format!( + "{{\"from\":\"{}\",\"to\":\"{}\",\"type\":{}}}", + src, dst, typ + ) + }) + .collect(); + println!("[{}]", items.join(",")); + } + Format::Text => { + for (src, dst, t) in results { + if let Some(t) = t { + println!("{} → {} [type='{}']", src, dst, t); + } else { + println!("{} → {}", src, dst); + } + } + } + } + } + LinkCmd::Backlinks(args) => { + let results = db::find_backlinks(conn, &args.pattern)?; + match format { + Format::Json => { + let items: Vec = results + .into_iter() + .map(|(src, t)| { + let typ = t + .as_ref() + .map(|s| format!("\"{}\"", s)) + .unwrap_or_else(|| "null".into()); + format!("{{\"from\":\"{}\",\"type\":{}}}", src, typ) + }) + .collect(); + println!("[{}]", items.join(",")); + } + Format::Text => { + for (src, t) in results { + if let Some(t) = t { + println!("{} [type='{}']", src, t); + } else { + println!("{}", src); + } + } + } + } + } + } + + Ok(()) +} diff --git a/src/cli/remind.rs b/src/cli/remind.rs new file mode 100644 index 0000000..99dac34 --- /dev/null +++ b/src/cli/remind.rs @@ -0,0 +1,22 @@ +// src/cli/remind.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum RemindCmd { + Set(ArgsSet), +} + +#[derive(Args, Debug)] +pub struct ArgsSet { + pub file_pattern: String, + pub timestamp: String, + pub message: String, +} + +pub fn run(cmd: &RemindCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + RemindCmd::Set(a) => todo!("remind set {:?}", a), + } +} diff --git a/src/cli/state.rs b/src/cli/state.rs new file mode 100644 index 0000000..7ac3628 --- /dev/null +++ b/src/cli/state.rs @@ -0,0 +1,26 @@ +// src/cli/state.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum StateCmd { + Set(ArgsSet), + TransitionsAdd(ArgsTrans), + Log(ArgsLog), +} + +#[derive(Args, Debug)] +pub struct ArgsSet { pub file_pattern: String, pub new_state: String } +#[derive(Args, Debug)] +pub struct ArgsTrans { pub from_state: String, pub to_state: String } +#[derive(Args, Debug)] +pub struct ArgsLog { pub file_pattern: String } + +pub fn run(cmd: &StateCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + StateCmd::Set(a) => todo!("state set {:?}", a), + StateCmd::TransitionsAdd(a)=> todo!("state transitions-add {:?}", a), + StateCmd::Log(a) => todo!("state log {:?}", a), + } +} diff --git a/src/cli/task.rs b/src/cli/task.rs new file mode 100644 index 0000000..57f9d4c --- /dev/null +++ b/src/cli/task.rs @@ -0,0 +1,22 @@ +// src/cli/task.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum TaskCmd { + Scan(ArgsScan), + List(ArgsList), +} + +#[derive(Args, Debug)] +pub struct ArgsScan { pub directory: String } +#[derive(Args, Debug)] +pub struct ArgsList { #[arg(long)] pub due_today: bool } + +pub fn run(cmd: &TaskCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + TaskCmd::Scan(a) => todo!("task scan {:?}", a), + TaskCmd::List(a) => todo!("task list {:?}", a), + } +} diff --git a/src/cli/version.rs b/src/cli/version.rs new file mode 100644 index 0000000..0c5bf26 --- /dev/null +++ b/src/cli/version.rs @@ -0,0 +1,18 @@ +// src/cli/version.rs +use clap::{Subcommand, Args}; +use rusqlite::Connection; +use crate::cli::Format; + +#[derive(Subcommand, Debug)] +pub enum VersionCmd { + Diff(ArgsDiff), +} + +#[derive(Args, Debug)] +pub struct ArgsDiff { pub file: String } + +pub fn run(cmd: &VersionCmd, _conn: &mut Connection, _format: Format) -> anyhow::Result<()> { + match cmd { + VersionCmd::Diff(a) => todo!("version diff {:?}", a), + } +} diff --git a/src/cli/view.rs b/src/cli/view.rs new file mode 100644 index 0000000..7f17ad7 --- /dev/null +++ b/src/cli/view.rs @@ -0,0 +1,168 @@ +//! `marlin view …` – save & use “smart folders” (named queries). + +use std::fs; + +use anyhow::Result; +use clap::{Args, Subcommand}; +use rusqlite::Connection; + +use crate::{cli::Format, db}; + +#[derive(Subcommand, Debug)] +pub enum ViewCmd { + /// Save (or update) a view + Save(ArgsSave), + /// List all saved views + List, + /// Execute a view (print matching paths) + Exec(ArgsExec), +} + +#[derive(Args, Debug)] +pub struct ArgsSave { + pub view_name: String, + pub query: String, +} + +#[derive(Args, Debug)] +pub struct ArgsExec { + pub view_name: String, +} + +pub fn run(cmd: &ViewCmd, conn: &mut Connection, fmt: Format) -> anyhow::Result<()> { + match cmd { + /* ── view save ───────────────────────────────────────────── */ + ViewCmd::Save(a) => { + db::save_view(conn, &a.view_name, &a.query)?; + if matches!(fmt, Format::Text) { + println!("Saved view '{}' = {}", a.view_name, a.query); + } + } + + /* ── view list ───────────────────────────────────────────── */ + ViewCmd::List => { + let views = db::list_views(conn)?; + match fmt { + Format::Text => { + for (name, q) in views { + println!("{name}: {q}"); + } + } + Format::Json => { + #[cfg(feature = "json")] + { + println!("{}", serde_json::to_string(&views)?); + } + } + } + } + + /* ── view exec ───────────────────────────────────────────── */ + ViewCmd::Exec(a) => { + let raw = db::view_query(conn, &a.view_name)?; + + // Re-use the tiny parser from marlin search + let fts_expr = build_fts_match(&raw); + + let mut stmt = conn.prepare( + r#" + SELECT f.path + FROM files_fts + JOIN files f ON f.rowid = files_fts.rowid + WHERE files_fts MATCH ?1 + ORDER BY rank + "#, + )?; + let mut paths: Vec = stmt + .query_map([fts_expr], |r| r.get::<_, String>(0))? + .collect::>()?; + + /* ── NEW: graceful fallback when FTS finds nothing ───── */ + if paths.is_empty() && !raw.contains(':') { + paths = naive_search(conn, &raw)?; + } + + if paths.is_empty() && matches!(fmt, Format::Text) { + eprintln!("(view '{}' has no matches)", a.view_name); + } else { + for p in paths { + println!("{p}"); + } + } + } + } + Ok(()) +} + +/* ─── naive substring path/content search (≤ 64 kB files) ───────── */ + +fn naive_search(conn: &Connection, term: &str) -> Result> { + let term_lc = term.to_lowercase(); + let mut stmt = conn.prepare("SELECT path FROM files")?; + let rows = stmt.query_map([], |r| r.get::<_, String>(0))?; + + let mut hits = Vec::new(); + for p in rows { + let p = p?; + /* path match */ + if p.to_lowercase().contains(&term_lc) { + hits.push(p); + continue; + } + /* small-file content match */ + if let Ok(meta) = fs::metadata(&p) { + if meta.len() > 64_000 { + continue; + } + } + if let Ok(content) = fs::read_to_string(&p) { + if content.to_lowercase().contains(&term_lc) { + hits.push(p); + } + } + } + Ok(hits) +} + +/* ─── minimal copy of search-string → FTS5 translator ───────────── */ + +fn build_fts_match(raw_query: &str) -> String { + use shlex; + let mut parts = Vec::new(); + let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]); + for tok in toks { + if ["AND", "OR", "NOT"].contains(&tok.as_str()) { + parts.push(tok); + } else if let Some(tag) = tok.strip_prefix("tag:") { + for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() { + if i > 0 { + parts.push("AND".into()); + } + parts.push(format!("tags_text:{}", escape(seg))); + } + } else if let Some(attr) = tok.strip_prefix("attr:") { + let mut kv = attr.splitn(2, '='); + let key = kv.next().unwrap(); + if let Some(val) = kv.next() { + parts.push(format!("attrs_text:{}", escape(key))); + parts.push("AND".into()); + parts.push(format!("attrs_text:{}", escape(val))); + } else { + parts.push(format!("attrs_text:{}", escape(key))); + } + } else { + parts.push(escape(&tok)); + } + } + parts.join(" ") +} + +fn escape(term: &str) -> String { + if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c)) + || ["AND", "OR", "NOT", "NEAR"].contains(&term.to_uppercase().as_str()) + { + format!("\"{}\"", term.replace('"', "\"\"")) + } else { + term.to_string() + } +} diff --git a/src/config.rs b/src/config.rs index 0cd4bc4..d0292a1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,10 @@ -use std::path::{Path, PathBuf}; - use anyhow::Result; use directories::ProjectDirs; +use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + path::{Path, PathBuf}, +}; /// Runtime configuration (currently just the DB path). #[derive(Debug, Clone)] @@ -10,22 +13,39 @@ pub struct Config { } impl Config { - /// Resolve configuration from environment or XDG directories. + /// Resolve configuration from environment or derive one per-workspace. + /// + /// Priority: + /// 1. `MARLIN_DB_PATH` env-var (explicit override) + /// 2. *Workspace-local* file under XDG data dir + /// (`~/.local/share/marlin/index_.db`) + /// 3. Fallback to `./index.db` when we cannot locate an XDG dir pub fn load() -> Result { - let db_path = std::env::var_os("MARLIN_DB_PATH") - .map(PathBuf::from) - .or_else(|| { - ProjectDirs::from("io", "Marlin", "marlin") - .map(|dirs| dirs.data_dir().join("index.db")) - }) - .unwrap_or_else(|| Path::new("index.db").to_path_buf()); + // 1) explicit override + if let Some(val) = std::env::var_os("MARLIN_DB_PATH") { + let p = PathBuf::from(val); + std::fs::create_dir_all(p.parent().expect("has parent"))?; + return Ok(Self { db_path: p }); + } - std::fs::create_dir_all( - db_path - .parent() - .expect("db_path should always have a parent directory"), - )?; + // 2) derive per-workspace DB name from CWD hash + let cwd = std::env::current_dir()?; + let mut h = DefaultHasher::new(); + cwd.hash(&mut h); + let digest = h.finish(); // 64-bit + let file_name = format!("index_{digest:016x}.db"); - Ok(Self { db_path }) + if let Some(dirs) = ProjectDirs::from("io", "Marlin", "marlin") { + let dir = dirs.data_dir(); + std::fs::create_dir_all(dir)?; + return Ok(Self { + db_path: dir.join(file_name), + }); + } + + // 3) very last resort – workspace-relative DB + Ok(Self { + db_path: Path::new(&file_name).to_path_buf(), + }) } } diff --git a/src/db/migrations/0003_create_links_collections_views.sql b/src/db/migrations/0003_create_links_collections_views.sql new file mode 100644 index 0000000..7ffca89 --- /dev/null +++ b/src/db/migrations/0003_create_links_collections_views.sql @@ -0,0 +1,28 @@ +PRAGMA foreign_keys = ON; + +-- File-to-file links +CREATE TABLE IF NOT EXISTS links ( + id INTEGER PRIMARY KEY, + src_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + dst_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + type TEXT, + UNIQUE(src_file_id, dst_file_id, type) +); + +-- Named collections +CREATE TABLE IF NOT EXISTS collections ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE +); +CREATE TABLE IF NOT EXISTS collection_files ( + collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + PRIMARY KEY(collection_id, file_id) +); + +-- Saved views +CREATE TABLE IF NOT EXISTS views ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + query TEXT NOT NULL +); diff --git a/src/db/migrations/0004_fix_hierarchical_tags_fts.sql b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql new file mode 100644 index 0000000..273079e --- /dev/null +++ b/src/db/migrations/0004_fix_hierarchical_tags_fts.sql @@ -0,0 +1,289 @@ +-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql +PRAGMA foreign_keys = ON; +PRAGMA journal_mode = WAL; + +-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded +DROP TRIGGER IF EXISTS files_fts_ai_file; +DROP TRIGGER IF EXISTS files_fts_au_file; +DROP TRIGGER IF EXISTS files_fts_ad_file; +DROP TRIGGER IF EXISTS file_tags_fts_ai; +DROP TRIGGER IF EXISTS file_tags_fts_ad; +DROP TRIGGER IF EXISTS attributes_fts_ai; +DROP TRIGGER IF EXISTS attributes_fts_au; +DROP TRIGGER IF EXISTS attributes_fts_ad; + +-- Create a new trigger for file insertion that uses recursive CTE for full tag paths +CREATE TRIGGER files_fts_ai_file +AFTER INSERT ON files +BEGIN + INSERT INTO files_fts(rowid, path, tags_text, attrs_text) + VALUES ( + NEW.id, + NEW.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = NEW.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = NEW.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = NEW.id) + ); +END; + +-- Recreate the file path update trigger +CREATE TRIGGER files_fts_au_file +AFTER UPDATE OF path ON files +BEGIN + UPDATE files_fts + SET path = NEW.path + WHERE rowid = NEW.id; +END; + +-- Recreate the file deletion trigger +CREATE TRIGGER files_fts_ad_file +AFTER DELETE ON files +BEGIN + DELETE FROM files_fts WHERE rowid = OLD.id; +END; + +-- Create new trigger for tag insertion that uses recursive CTE for full tag paths +CREATE TRIGGER file_tags_fts_ai +AFTER INSERT ON file_tags +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +-- Create new trigger for tag deletion that uses recursive CTE for full tag paths +CREATE TRIGGER file_tags_fts_ad +AFTER DELETE ON file_tags +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = OLD.file_id; +END; + +-- Create new triggers for attribute operations that use recursive CTE for full tag paths +CREATE TRIGGER attributes_fts_ai +AFTER INSERT ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +CREATE TRIGGER attributes_fts_au +AFTER UPDATE OF value ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; +END; + +CREATE TRIGGER attributes_fts_ad +AFTER DELETE ON attributes +BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = OLD.file_id; +END; + +-- Update all existing FTS entries with the new tag-path format +INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) +SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path AS tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name AS tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) +FROM files f; diff --git a/src/db/mod.rs b/src/db/mod.rs index 2e8286b..6a13a43 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,9 +1,12 @@ -// src/db/mod.rs +//! Central DB helper – connection bootstrap, migrations **and** most +//! data-access helpers (tags, links, collections, saved views, …). + use std::{ fs, path::{Path, PathBuf}, }; +use std::result::Result as StdResult; use anyhow::{Context, Result}; use chrono::Local; use rusqlite::{ @@ -12,16 +15,20 @@ use rusqlite::{ Connection, OpenFlags, OptionalExtension, + TransactionBehavior, }; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; + +/* ─── embedded migrations ─────────────────────────────────────────── */ -/// Embed every numbered migration file here. const MIGRATIONS: &[(&str, &str)] = &[ ("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")), ("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")), + ("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")), + ("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")), ]; -/* ─── connection bootstrap ──────────────────────────────────────────── */ +/* ─── connection bootstrap ────────────────────────────────────────── */ pub fn open>(db_path: P) -> Result { let db_path_ref = db_path.as_ref(); @@ -31,16 +38,18 @@ pub fn open>(db_path: P) -> Result { conn.pragma_update(None, "journal_mode", "WAL")?; conn.pragma_update(None, "foreign_keys", "ON")?; - // Apply migrations (drops & recreates all FTS triggers) - apply_migrations(&mut conn)?; + // Wait up to 30 s for a competing writer before giving up + conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked + apply_migrations(&mut conn)?; Ok(conn) } -/* ─── migration runner ──────────────────────────────────────────────── */ + +/* ─── migration runner ────────────────────────────────────────────── */ fn apply_migrations(conn: &mut Connection) -> Result<()> { - // Ensure schema_version table + // Ensure schema_version bookkeeping table exists conn.execute_batch( "CREATE TABLE IF NOT EXISTS schema_version ( version INTEGER PRIMARY KEY, @@ -48,10 +57,11 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> { );", )?; - // Legacy patch (ignore if exists) + // Legacy patch – ignore errors if column already exists let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []); - let tx = conn.transaction()?; + // Grab the write-lock up-front so migrations can run uninterrupted + let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?; for (fname, sql) in MIGRATIONS { let version: i64 = fname @@ -74,13 +84,8 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> { } info!("applying migration {}", fname); - println!( - "\nSQL SCRIPT FOR MIGRATION: {}\nBEGIN SQL >>>\n{}\n<<< END SQL\n", - fname, sql - ); - tx.execute_batch(sql) - .with_context(|| format!("could not apply migration {}", fname))?; + .with_context(|| format!("could not apply migration {fname}"))?; tx.execute( "INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)", @@ -89,10 +94,31 @@ fn apply_migrations(conn: &mut Connection) -> Result<()> { } tx.commit()?; + + // sanity – warn if any embedded migration got skipped + let mut missing = Vec::new(); + for (fname, _) in MIGRATIONS { + let v: i64 = fname.split('_').next().unwrap().parse().unwrap(); + let ok: bool = conn + .query_row( + "SELECT 1 FROM schema_version WHERE version = ?1", + [v], + |_| Ok(true), + ) + .optional()? + .unwrap_or(false); + if !ok { + missing.push(v); + } + } + if !missing.is_empty() { + warn!("migrations not applied: {:?}", missing); + } + Ok(()) } -/* ─── helpers ───────────────────────────────────────────────────────── */ +/* ─── tag helpers ─────────────────────────────────────────────────── */ pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result { let mut parent: Option = None; @@ -104,7 +130,7 @@ pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result { let id: i64 = conn.query_row( "SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)", params![segment, parent], - |row| row.get(0), + |r| r.get(0), )?; parent = Some(id); } @@ -116,6 +142,8 @@ pub fn file_id(conn: &Connection, path: &str) -> Result { .map_err(|_| anyhow::anyhow!("file not indexed: {}", path)) } +/* ─── attributes ──────────────────────────────────────────────────── */ + pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> { conn.execute( r#" @@ -128,7 +156,161 @@ pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> R Ok(()) } -/* ─── backup / restore ──────────────────────────────────────────────── */ +/* ─── links ───────────────────────────────────────────────────────── */ + +pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { + conn.execute( + "INSERT INTO links(src_file_id, dst_file_id, type) + VALUES (?1, ?2, ?3) + ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING", + params![src_file_id, dst_file_id, link_type], + )?; + Ok(()) +} + +pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { + conn.execute( + "DELETE FROM links + WHERE src_file_id = ?1 + AND dst_file_id = ?2 + AND (type IS ?3 OR type = ?3)", + params![src_file_id, dst_file_id, link_type], + )?; + Ok(()) +} + +pub fn list_links( + conn: &Connection, + pattern: &str, + direction: Option<&str>, + link_type: Option<&str>, +) -> Result)>> { + let like_pattern = pattern.replace('*', "%"); + + // Files matching pattern + let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?; + let rows = stmt + .query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))? + .collect::, _>>()?; + + let mut out = Vec::new(); + for (fid, fpath) in rows { + let (src_col, dst_col) = match direction { + Some("in") => ("dst_file_id", "src_file_id"), + _ => ("src_file_id", "dst_file_id"), + }; + + let sql = format!( + "SELECT f2.path, l.type + FROM links l + JOIN files f2 ON f2.id = l.{dst_col} + WHERE l.{src_col} = ?1 + AND (?2 IS NULL OR l.type = ?2)", + ); + + let mut stmt2 = conn.prepare(&sql)?; + let links = stmt2 + .query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option>(1)?)))? + .collect::, _>>()?; + + for (other, typ) in links { + out.push((fpath.clone(), other, typ)); + } + } + Ok(out) +} + +pub fn find_backlinks( + conn: &Connection, + pattern: &str, +) -> Result)>> { + let like = pattern.replace('*', "%"); + + let mut stmt = conn.prepare( + "SELECT f1.path, l.type + FROM links l + JOIN files f1 ON f1.id = l.src_file_id + JOIN files f2 ON f2.id = l.dst_file_id + WHERE f2.path LIKE ?1", + )?; + + let rows = stmt.query_map([like], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, Option>(1)?)) + })?; + + let out = rows.collect::, _>>()?; // rusqlite → anyhow via `?` + Ok(out) +} + +/* ─── NEW: collections helpers ────────────────────────────────────── */ + +pub fn ensure_collection(conn: &Connection, name: &str) -> Result { + conn.execute( + "INSERT OR IGNORE INTO collections(name) VALUES (?1)", + params![name], + )?; + conn.query_row( + "SELECT id FROM collections WHERE name = ?1", + params![name], + |r| r.get(0), + ) + .context("collection lookup failed") +} + +pub fn add_file_to_collection(conn: &Connection, coll_id: i64, file_id: i64) -> Result<()> { + conn.execute( + "INSERT OR IGNORE INTO collection_files(collection_id, file_id) + VALUES (?1, ?2)", + params![coll_id, file_id], + )?; + Ok(()) +} + +pub fn list_collection(conn: &Connection, name: &str) -> Result> { + let mut stmt = conn.prepare( + r#"SELECT f.path + FROM collections c + JOIN collection_files cf ON cf.collection_id = c.id + JOIN files f ON f.id = cf.file_id + WHERE c.name = ?1 + ORDER BY f.path"#, + )?; + + let rows = stmt.query_map([name], |r| r.get::<_, String>(0))?; + let list = rows.collect::, _>>()?; + Ok(list) +} + +/* ─── NEW: saved views (smart folders) ────────────────────────────── */ + +pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> { + conn.execute( + "INSERT INTO views(name, query) + VALUES (?1, ?2) + ON CONFLICT(name) DO UPDATE SET query = excluded.query", + params![name, query], + )?; + Ok(()) +} + +pub fn list_views(conn: &Connection) -> Result> { + let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?; + + let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?; + let list = rows.collect::, _>>()?; + Ok(list) +} + +pub fn view_query(conn: &Connection, name: &str) -> Result { + conn.query_row( + "SELECT query FROM views WHERE name = ?1", + [name], + |r| r.get::<_, String>(0), + ) + .context(format!("no view called '{name}'")) +} + +/* ─── backup / restore helpers ────────────────────────────────────── */ pub fn backup>(db_path: P) -> Result { let src = db_path.as_ref(); @@ -153,3 +335,15 @@ pub fn restore>(backup_path: P, live_db_path: P) -> Result<()> { fs::copy(&backup_path, &live_db_path)?; Ok(()) } + +/* ─── tests ───────────────────────────────────────────────────────── */ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn migrations_apply_in_memory() { + open(":memory:").expect("all migrations apply"); + } +} diff --git a/src/logging.rs b/src/logging.rs index a0141ed..514fa0d 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -5,9 +5,13 @@ use tracing_subscriber::{fmt, EnvFilter}; /// Reads `RUST_LOG` for filtering, falls back to `info`. pub fn init() { let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + + // All tracing output (INFO, WARN, ERROR …) now goes to *stderr* so the + // integration tests can assert on warnings / errors reliably. fmt() - .with_target(false) - .with_level(true) - .with_env_filter(filter) + .with_target(false) // hide module targets + .with_level(true) // include log level + .with_env_filter(filter) // respect RUST_LOG + .with_writer(std::io::stderr) // <-- NEW: send to stderr .init(); } diff --git a/src/main.rs b/src/main.rs index 46fe75b..984fbec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,6 @@ // src/main.rs +#![deny(warnings)] + mod cli; mod config; mod db; @@ -6,46 +8,74 @@ mod logging; mod scan; use anyhow::{Context, Result}; -use clap::Parser; +use clap::{CommandFactory, Parser}; +use clap_complete::generate; use glob::Pattern; use rusqlite::params; use shellexpand; use shlex; -use std::{env, path::PathBuf, process::Command}; +use std::{ + env, + fs, + io, + path::{Path, PathBuf}, + process::Command, +}; use tracing::{debug, error, info}; use walkdir::WalkDir; -use cli::{AttrCmd, Cli, Commands}; +use cli::{Cli, Commands}; fn main() -> Result<()> { - // Parse CLI and bootstrap logging + /* ── CLI parsing & logging ────────────────────────────────────── */ + let args = Cli::parse(); if args.verbose { - // switch on debug‐level logs env::set_var("RUST_LOG", "debug"); } logging::init(); - let cfg = config::Config::load()?; + /* ── shell-completion shortcut ───────────────────────────────── */ - // Backup before any non-init, non-backup/restore command - if !matches!(args.command, Commands::Init | Commands::Backup | Commands::Restore { .. }) { - match db::backup(&cfg.db_path) { - Ok(path) => info!("Pre-command auto-backup created at {}", path.display()), - Err(e) => error!("Failed to create pre-command auto-backup: {}", e), - } + if let Commands::Completions { shell } = &args.command { + let mut cmd = Cli::command(); + generate(*shell, &mut cmd, "marlin", &mut io::stdout()); + return Ok(()); } - // Open (and migrate) the DB + /* ── config & automatic backup ───────────────────────────────── */ + + let cfg = config::Config::load()?; // DB path, etc. + + match &args.command { + Commands::Init | Commands::Backup | Commands::Restore { .. } => {} + _ => match db::backup(&cfg.db_path) { + Ok(path) => info!("Pre-command auto-backup created at {}", path.display()), + Err(e) => error!("Failed to create pre-command auto-backup: {e}"), + }, + } + + /* ── open DB (runs migrations if needed) ─────────────────────── */ + let mut conn = db::open(&cfg.db_path)?; + /* ── command dispatch ────────────────────────────────────────── */ + match args.command { + Commands::Completions { .. } => {} // already handled + Commands::Init => { info!("Database initialised at {}", cfg.db_path.display()); + + // Always (re-)scan the current directory so even an existing DB + // picks up newly created files in the working tree. + let cwd = env::current_dir().context("getting current directory")?; + let count = scan::scan_directory(&mut conn, &cwd) + .context("initial scan failed")?; + info!("Initial scan complete – indexed/updated {count} files"); } Commands::Scan { paths } => { - // if none given, default to current dir let scan_paths = if paths.is_empty() { vec![env::current_dir()?] } else { @@ -56,22 +86,16 @@ fn main() -> Result<()> { } } - Commands::Tag { pattern, tag_path } => { - apply_tag(&conn, &pattern, &tag_path)?; - } + Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?, Commands::Attr { action } => match action { - AttrCmd::Set { pattern, key, value } => { - attr_set(&conn, &pattern, &key, &value)?; - } - AttrCmd::Ls { path } => { - attr_ls(&conn, &path)?; + cli::AttrCmd::Set { pattern, key, value } => { + attr_set(&conn, &pattern, &key, &value)? } + cli::AttrCmd::Ls { path } => attr_ls(&conn, &path)?, }, - Commands::Search { query, exec } => { - run_search(&conn, &query, exec)?; - } + Commands::Search { query, exec } => run_search(&conn, &query, exec)?, Commands::Backup => { let path = db::backup(&cfg.db_path)?; @@ -79,48 +103,89 @@ fn main() -> Result<()> { } Commands::Restore { backup_path } => { - drop(conn); - db::restore(&backup_path, &cfg.db_path) - .with_context(|| format!("Failed to restore DB from {}", backup_path.display()))?; - println!("Restored DB file from {}", backup_path.display()); - db::open(&cfg.db_path) - .with_context(|| format!("Could not open restored DB at {}", cfg.db_path.display()))?; - info!("Successfully opened and processed restored database."); + drop(conn); // close handle before overwrite + db::restore(&backup_path, &cfg.db_path).with_context(|| { + format!("Failed to restore DB from {}", backup_path.display()) + })?; + println!("Restored DB from {}", backup_path.display()); + db::open(&cfg.db_path).with_context(|| { + format!("Could not open restored DB at {}", cfg.db_path.display()) + })?; + info!("Successfully opened restored database."); } + + /* passthrough sub-modules that still stub out their logic */ + Commands::Link(link_cmd) => cli::link::run(&link_cmd, &mut conn, args.format)?, + Commands::Coll(coll_cmd) => cli::coll::run(&coll_cmd, &mut conn, args.format)?, + Commands::View(view_cmd) => cli::view::run(&view_cmd, &mut conn, args.format)?, + Commands::State(state_cmd) => cli::state::run(&state_cmd, &mut conn, args.format)?, + Commands::Task(task_cmd) => cli::task::run(&task_cmd, &mut conn, args.format)?, + Commands::Remind(rm_cmd) => cli::remind::run(&rm_cmd, &mut conn, args.format)?, + Commands::Annotate(an_cmd) => cli::annotate::run(&an_cmd, &mut conn, args.format)?, + Commands::Version(v_cmd) => cli::version::run(&v_cmd, &mut conn, args.format)?, + Commands::Event(e_cmd) => cli::event::run(&e_cmd, &mut conn, args.format)?, } Ok(()) } +/* ───────────────────────── helpers & sub-routines ───────────────── */ + +/* ---------- TAGS ---------- */ + /// Apply a hierarchical tag to all files matching the glob pattern. fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> { - let tag_id = db::ensure_tag_path(conn, tag_path)?; + // ensure_tag_path returns the deepest-node ID + let leaf_tag_id = db::ensure_tag_path(conn, tag_path)?; + + // collect that tag and all its ancestors + let mut tag_ids = Vec::new(); + let mut current = Some(leaf_tag_id); + while let Some(id) = current { + tag_ids.push(id); + current = match conn.query_row( + "SELECT parent_id FROM tags WHERE id = ?1", + params![id], + |r| r.get::<_, Option>(0), + ) { + Ok(parent_id) => parent_id, + Err(rusqlite::Error::QueryReturnedNoRows) => None, + Err(e) => return Err(e.into()), + }; + } + let expanded = shellexpand::tilde(pattern).into_owned(); - let pat = Pattern::new(&expanded) - .with_context(|| format!("Invalid glob pattern `{}`", expanded))?; + let pat = Pattern::new(&expanded) + .with_context(|| format!("Invalid glob pattern `{expanded}`"))?; let root = determine_scan_root(&expanded); - let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; - let mut stmt_insert = - conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?; + let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; + let mut stmt_insert = conn.prepare( + "INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)", + )?; let mut count = 0; - for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) { + for entry in WalkDir::new(&root) + .into_iter() + .filter_map(Result::ok) + .filter(|e| e.file_type().is_file()) + { let path_str = entry.path().to_string_lossy(); - debug!("testing path: {}", path_str); if !pat.matches(&path_str) { - debug!(" → no match"); continue; } - debug!(" → matched"); match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) { Ok(file_id) => { - if stmt_insert.execute(params![file_id, tag_id])? > 0 { + let mut newly = false; + for &tid in &tag_ids { + if stmt_insert.execute(params![file_id, tid])? > 0 { + newly = true; + } + } + if newly { info!(file = %path_str, tag = tag_path, "tagged"); count += 1; - } else { - debug!(file = %path_str, tag = tag_path, "already tagged"); } } Err(rusqlite::Error::QueryReturnedNoRows) => { @@ -132,42 +197,39 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Resu } } - if count > 0 { - info!("Applied tag '{}' to {} file(s).", tag_path, count); - } else { - info!("No new files were tagged with '{}' (no matches or already tagged).", tag_path); - } + info!( + "Applied tag '{}' to {} file(s).", + tag_path, count + ); Ok(()) } +/* ---------- ATTRIBUTES ---------- */ + /// Set a key=value attribute on all files matching the glob pattern. -fn attr_set( - conn: &rusqlite::Connection, - pattern: &str, - key: &str, - value: &str, -) -> Result<()> { +fn attr_set(conn: &rusqlite::Connection, pattern: &str, key: &str, value: &str) -> Result<()> { let expanded = shellexpand::tilde(pattern).into_owned(); - let pat = Pattern::new(&expanded) - .with_context(|| format!("Invalid glob pattern `{}`", expanded))?; + let pat = Pattern::new(&expanded) + .with_context(|| format!("Invalid glob pattern `{expanded}`"))?; let root = determine_scan_root(&expanded); let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; let mut count = 0; - for entry in WalkDir::new(&root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) { + for entry in WalkDir::new(&root) + .into_iter() + .filter_map(Result::ok) + .filter(|e| e.file_type().is_file()) + { let path_str = entry.path().to_string_lossy(); - debug!("testing attr path: {}", path_str); if !pat.matches(&path_str) { - debug!(" → no match"); continue; } - debug!(" → matched"); match stmt_file.query_row(params![path_str.as_ref()], |r| r.get::<_, i64>(0)) { Ok(file_id) => { db::upsert_attr(conn, file_id, key, value)?; - info!(file = %path_str, key = key, value = value, "attr set"); + info!(file = %path_str, key, value, "attr set"); count += 1; } Err(rusqlite::Error::QueryReturnedNoRows) => { @@ -179,45 +241,64 @@ fn attr_set( } } - if count > 0 { - info!("Attribute '{}: {}' set on {} file(s).", key, value, count); - } else { - info!("No attributes set (no matches or not indexed)."); - } + info!( + "Attribute '{}={}' set on {} file(s).", + key, value, count + ); Ok(()) } /// List attributes for a given file path. -fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> { +fn attr_ls(conn: &rusqlite::Connection, path: &Path) -> Result<()> { let file_id = db::file_id(conn, &path.to_string_lossy())?; - let mut stmt = conn.prepare( - "SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key", - )?; - for row in stmt.query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? { + let mut stmt = + conn.prepare("SELECT key, value FROM attributes WHERE file_id = ?1 ORDER BY key")?; + for row in stmt + .query_map([file_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))? + { let (k, v) = row?; println!("{k} = {v}"); } Ok(()) } -/// Build and run an FTS5 search query, with optional exec. +/* ---------- SEARCH ---------- */ + +/// Run an FTS5 search, optionally piping each hit through `exec`. +/// Falls back to a simple substring scan (path + ≤64 kB file contents) +/// when the FTS index yields no rows. fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option) -> Result<()> { - let mut fts_query_parts = Vec::new(); - let parts = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]); - for part in parts { - if ["AND", "OR", "NOT"].contains(&part.as_str()) { - fts_query_parts.push(part); - } else if let Some(tag) = part.strip_prefix("tag:") { - fts_query_parts.push(format!("tags_text:{}", escape_fts_query_term(tag))); - } else if let Some(attr) = part.strip_prefix("attr:") { - fts_query_parts.push(format!("attrs_text:{}", escape_fts_query_term(attr))); + // Build the FTS MATCH expression + let mut parts = Vec::new(); + let toks = shlex::split(raw_query).unwrap_or_else(|| vec![raw_query.to_string()]); + for tok in toks { + if ["AND", "OR", "NOT"].contains(&tok.as_str()) { + parts.push(tok); + } else if let Some(tag) = tok.strip_prefix("tag:") { + for (i, seg) in tag.split('/').filter(|s| !s.is_empty()).enumerate() { + if i > 0 { + parts.push("AND".into()); + } + parts.push(format!("tags_text:{}", escape_fts(seg))); + } + } else if let Some(attr) = tok.strip_prefix("attr:") { + let mut kv = attr.splitn(2, '='); + let key = kv.next().unwrap(); + if let Some(val) = kv.next() { + parts.push(format!("attrs_text:{}", escape_fts(key))); + parts.push("AND".into()); + parts.push(format!("attrs_text:{}", escape_fts(val))); + } else { + parts.push(format!("attrs_text:{}", escape_fts(key))); + } } else { - fts_query_parts.push(escape_fts_query_term(&part)); + parts.push(escape_fts(&tok)); } } - let fts_expr = fts_query_parts.join(" "); - debug!("Constructed FTS MATCH expression: {}", fts_expr); + let fts_expr = parts.join(" "); + debug!("FTS MATCH expression: {fts_expr}"); + // ---------- primary FTS query ---------- let mut stmt = conn.prepare( r#" SELECT f.path @@ -227,51 +308,27 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option ORDER BY rank "#, )?; - let hits: Vec = stmt - .query_map(params![fts_expr], |row| row.get(0))? + let mut hits: Vec = stmt + .query_map(params![fts_expr], |r| r.get::<_, String>(0))? .filter_map(Result::ok) .collect(); + // ---------- graceful fallback ---------- + if hits.is_empty() && !raw_query.contains(':') { + hits = naive_substring_search(conn, raw_query)?; + } + + // ---------- output / exec ---------- if let Some(cmd_tpl) = exec { - let mut ran_without_placeholder = false; - if hits.is_empty() && !cmd_tpl.contains("{}") { - if let Some(mut parts) = shlex::split(&cmd_tpl) { - if !parts.is_empty() { - let prog = parts.remove(0); - let status = Command::new(&prog).args(&parts).status()?; - if !status.success() { - error!(command=%cmd_tpl, code=?status.code(), "command failed"); - } - } - } - ran_without_placeholder = true; - } - if !ran_without_placeholder { - for path in hits { - let quoted = shlex::try_quote(&path).unwrap_or(path.clone().into()); - let cmd_final = if cmd_tpl.contains("{}") { - cmd_tpl.replace("{}", "ed) - } else { - format!("{} {}", cmd_tpl, "ed) - }; - if let Some(mut parts) = shlex::split(&cmd_final) { - if parts.is_empty() { - continue; - } - let prog = parts.remove(0); - let status = Command::new(&prog).args(&parts).status()?; - if !status.success() { - error!(file=%path, command=%cmd_final, code=?status.code(), "command failed"); - } - } - } - } + run_exec(&hits, &cmd_tpl)?; } else { if hits.is_empty() { - eprintln!("No matches for query: `{}` (FTS expression: `{}`)", raw_query, fts_expr); + eprintln!( + "No matches for query: `{raw_query}` (FTS expression: `{fts_expr}`)" + ); } else { for p in hits { - println!("{}", p); + println!("{p}"); } } } @@ -279,10 +336,81 @@ fn run_search(conn: &rusqlite::Connection, raw_query: &str, exec: Option Ok(()) } -/// Quote terms for FTS when needed. -fn escape_fts_query_term(term: &str) -> String { +/// Simple, case-insensitive substring scan over paths and (small) file bodies. +fn naive_substring_search(conn: &rusqlite::Connection, term: &str) -> Result> { + let term_lc = term.to_lowercase(); + + let mut stmt = conn.prepare("SELECT path FROM files")?; + let rows = stmt.query_map([], |r| r.get::<_, String>(0))?; + + let mut out = Vec::new(); + for p in rows { + let p = p?; + if p.to_lowercase().contains(&term_lc) { + out.push(p.clone()); + continue; + } + // Only inspect small files to stay fast + if let Ok(meta) = fs::metadata(&p) { + if meta.len() > 64_000 { + continue; + } + } + if let Ok(content) = fs::read_to_string(&p) { + if content.to_lowercase().contains(&term_lc) { + out.push(p); + } + } + } + Ok(out) +} + +/// Helper: run an external command template on every hit. +fn run_exec(paths: &[String], cmd_tpl: &str) -> Result<()> { + let mut ran_without_placeholder = false; + + if paths.is_empty() && !cmd_tpl.contains("{}") { + if let Some(mut parts) = shlex::split(cmd_tpl) { + if !parts.is_empty() { + let prog = parts.remove(0); + let status = Command::new(&prog).args(&parts).status()?; + if !status.success() { + error!(command = %cmd_tpl, code = ?status.code(), "command failed"); + } + } + } + ran_without_placeholder = true; + } + + if !ran_without_placeholder { + for p in paths { + let quoted = shlex::try_quote(p).unwrap_or_else(|_| p.into()); + let final_cmd = if cmd_tpl.contains("{}") { + cmd_tpl.replace("{}", "ed) + } else { + format!("{cmd_tpl} {quoted}") + }; + if let Some(mut parts) = shlex::split(&final_cmd) { + if parts.is_empty() { + continue; + } + let prog = parts.remove(0); + let status = Command::new(&prog).args(&parts).status()?; + if !status.success() { + error!(file = %p, command = %final_cmd, code = ?status.code(), "command failed"); + } + } + } + } + Ok(()) +} + +/* ---------- misc helpers ---------- */ + +fn escape_fts(term: &str) -> String { if term.contains(|c: char| c.is_whitespace() || "-:()\"".contains(c)) - || ["AND","OR","NOT","NEAR"].contains(&term.to_uppercase().as_str()) + || ["AND", "OR", "NOT", "NEAR"] + .contains(&term.to_uppercase().as_str()) { format!("\"{}\"", term.replace('"', "\"\"")) } else { @@ -292,16 +420,22 @@ fn escape_fts_query_term(term: &str) -> String { /// Determine a filesystem root to limit recursive walking. fn determine_scan_root(pattern: &str) -> PathBuf { - let wildcard_pos = pattern.find(|c| c == '*' || c == '?' || c == '[').unwrap_or(pattern.len()); - let prefix = &pattern[..wildcard_pos]; - let mut root = PathBuf::from(prefix); - while root.as_os_str().to_string_lossy().contains(|c| ['*','?','['].contains(&c)) { - if let Some(parent) = root.parent() { - root = parent.to_path_buf(); - } else { - root = PathBuf::from("."); - break; - } + let first_wild = pattern + .find(|c| matches!(c, '*' | '?' | '[')) + .unwrap_or(pattern.len()); + let mut root = PathBuf::from(&pattern[..first_wild]); + + while root + .as_os_str() + .to_string_lossy() + .contains(|c| matches!(c, '*' | '?' | '[')) + { + root = root.parent().map(Path::to_path_buf).unwrap_or_default(); + } + + if root.as_os_str().is_empty() { + PathBuf::from(".") + } else { + root } - root } diff --git a/src/test_hierarchical_tags.rs b/src/test_hierarchical_tags.rs new file mode 100644 index 0000000..5c36911 --- /dev/null +++ b/src/test_hierarchical_tags.rs @@ -0,0 +1,240 @@ +// Test script to validate hierarchical tag FTS fix +// This script demonstrates how the fix works with a simple test case + +use rusqlite::{Connection, params}; +use std::path::Path; +use std::fs; +use anyhow::Result; + +fn main() -> Result<()> { + // Create a test database in a temporary location + let db_path = Path::new("/tmp/marlin_test.db"); + if db_path.exists() { + fs::remove_file(db_path)?; + } + + println!("Creating test database at {:?}", db_path); + + // Initialize database with our schema and migrations + let conn = Connection::open(db_path)?; + + // Apply schema (simplified version of what's in the migrations) + println!("Applying schema..."); + conn.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + + CREATE TABLE files ( + id INTEGER PRIMARY KEY, + path TEXT NOT NULL UNIQUE, + size INTEGER, + mtime INTEGER, + hash TEXT + ); + + CREATE TABLE tags ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, + canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL, + UNIQUE(name, parent_id) + ); + + CREATE TABLE file_tags ( + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY(file_id, tag_id) + ); + + CREATE TABLE attributes ( + id INTEGER PRIMARY KEY, + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + key TEXT NOT NULL, + value TEXT, + UNIQUE(file_id, key) + ); + + CREATE VIRTUAL TABLE files_fts + USING fts5( + path, + tags_text, + attrs_text, + content='', + tokenize=\"unicode61 remove_diacritics 2\" + );" + )?; + + // Apply our fixed triggers + println!("Applying fixed FTS triggers..."); + conn.execute_batch( + "CREATE TRIGGER files_fts_ai_file + AFTER INSERT ON files + BEGIN + INSERT INTO files_fts(rowid, path, tags_text, attrs_text) + VALUES ( + NEW.id, + NEW.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path as tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = NEW.id + + UNION + + SELECT t.name as tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = NEW.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = NEW.id) + ); + END; + + CREATE TRIGGER file_tags_fts_ai + AFTER INSERT ON file_tags + BEGIN + INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text) + SELECT f.id, f.path, + (SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '') + FROM ( + WITH RECURSIVE tag_tree(id, name, parent_id, path) AS ( + SELECT t.id, t.name, t.parent_id, t.name + FROM tags t + WHERE t.parent_id IS NULL + + UNION ALL + + SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name + FROM tags t + JOIN tag_tree tt ON t.parent_id = tt.id + ) + SELECT DISTINCT tag_tree.path as tag_path + FROM file_tags ft + JOIN tag_tree ON ft.tag_id = tag_tree.id + WHERE ft.file_id = f.id + + UNION + + SELECT t.name as tag_path + FROM file_tags ft + JOIN tags t ON ft.tag_id = t.id + WHERE ft.file_id = f.id AND t.parent_id IS NULL + )), + (SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '') + FROM attributes a + WHERE a.file_id = f.id) + FROM files f + WHERE f.id = NEW.file_id; + END;" + )?; + + // Insert test data + println!("Inserting test data..."); + + // Insert a test file + conn.execute( + "INSERT INTO files (id, path) VALUES (1, '/test/document.md')", + [], + )?; + + // Create hierarchical tags: project/md + println!("Creating hierarchical tags: project/md"); + + // Insert parent tag 'project' + conn.execute( + "INSERT INTO tags (id, name, parent_id) VALUES (1, 'project', NULL)", + [], + )?; + + // Insert child tag 'md' under 'project' + conn.execute( + "INSERT INTO tags (id, name, parent_id) VALUES (2, 'md', 1)", + [], + )?; + + // Tag the file with the 'md' tag (which is under 'project') + conn.execute( + "INSERT INTO file_tags (file_id, tag_id) VALUES (1, 2)", + [], + )?; + + // Check what's in the FTS index + println!("\nChecking FTS index content:"); + let mut stmt = conn.prepare("SELECT rowid, path, tags_text, attrs_text FROM files_fts")?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + )) + })?; + + for row in rows { + let (id, path, tags, attrs) = row?; + println!("ID: {}, Path: {}, Tags: '{}', Attrs: '{}'", id, path, tags, attrs); + } + + // Test searching for the full hierarchical tag path + println!("\nTesting search for 'project/md':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project/md'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'project/md'"); + } + + // Test searching for just the parent tag + println!("\nTesting search for just 'project':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'project'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'project'"); + } + + // Test searching for just the child tag + println!("\nTesting search for just 'md':"); + let mut stmt = conn.prepare("SELECT f.path FROM files_fts JOIN files f ON f.id = files_fts.rowid WHERE files_fts MATCH 'md'")?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + + let mut found = false; + for row in rows { + found = true; + println!("Found file: {}", row?); + } + + if !found { + println!("No files found with tag 'md'"); + } + + println!("\nTest completed successfully!"); + Ok(()) +} diff --git a/target/release/marlin b/target/release/marlin index dffd7de..a571766 100755 Binary files a/target/release/marlin and b/target/release/marlin differ diff --git a/target/release/marlin.d b/target/release/marlin.d index 8d05671..391de91 100644 --- a/target/release/marlin.d +++ b/target/release/marlin.d @@ -1 +1 @@ -/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs +/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/src/cli.rs /home/user/Documents/GitHub/Marlin/src/config.rs /home/user/Documents/GitHub/Marlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/src/logging.rs /home/user/Documents/GitHub/Marlin/src/main.rs /home/user/Documents/GitHub/Marlin/src/scan.rs diff --git a/tests/e2e.rs b/tests/e2e.rs new file mode 100644 index 0000000..64fc8dc --- /dev/null +++ b/tests/e2e.rs @@ -0,0 +1,121 @@ +//! End-to-end “happy path” smoke-tests for the `marlin` binary. +//! +//! Run with `cargo test --test e2e` (CI does) or `cargo test`. + +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::{fs, path::PathBuf, process::Command}; +use tempfile::tempdir; + +/// Absolute path to the freshly-built `marlin` binary. +fn marlin_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_marlin")) +} + +/// Create the demo directory structure and seed files. +fn spawn_demo_tree(root: &PathBuf) { + fs::create_dir_all(root.join("Projects/Alpha")).unwrap(); + fs::create_dir_all(root.join("Projects/Beta")).unwrap(); + fs::create_dir_all(root.join("Projects/Gamma")).unwrap(); + fs::create_dir_all(root.join("Logs")).unwrap(); + fs::create_dir_all(root.join("Reports")).unwrap(); + + fs::write(root.join("Projects/Alpha/draft1.md"), "- [ ] TODO foo\n").unwrap(); + fs::write(root.join("Projects/Alpha/draft2.md"), "- [x] TODO foo\n").unwrap(); + fs::write(root.join("Projects/Beta/final.md"), "done\n").unwrap(); + fs::write(root.join("Projects/Gamma/TODO.txt"), "TODO bar\n").unwrap(); + fs::write(root.join("Logs/app.log"), "ERROR omg\n").unwrap(); + fs::write(root.join("Reports/Q1.pdf"), "PDF\n").unwrap(); +} + +/// Shorthand for “run and must succeed”. +fn ok(cmd: &mut Command) -> assert_cmd::assert::Assert { + cmd.assert().success() +} + +#[test] +fn full_cli_flow() -> Result<(), Box> { + /* ── 1 ░ sandbox ───────────────────────────────────────────── */ + + let tmp = tempdir()?; // wiped on drop + let demo_dir = tmp.path().join("marlin_demo"); + spawn_demo_tree(&demo_dir); + + let db_path = demo_dir.join("index.db"); + + // Helper to spawn a fresh `marlin` Command with the DB env-var set + let marlin = || { + let mut c = Command::new(marlin_bin()); + c.env("MARLIN_DB_PATH", &db_path); + c + }; + + /* ── 2 ░ init ( auto-scan cwd ) ───────────────────────────── */ + + ok(marlin() + .current_dir(&demo_dir) + .arg("init")); + + /* ── 3 ░ tag & attr demos ─────────────────────────────────── */ + + ok(marlin() + .arg("tag") + .arg(format!("{}/Projects/**/*.md", demo_dir.display())) + .arg("project/md")); + + ok(marlin() + .arg("attr") + .arg("set") + .arg(format!("{}/Reports/*.pdf", demo_dir.display())) + .arg("reviewed") + .arg("yes")); + + /* ── 4 ░ quick search sanity checks ───────────────────────── */ + + marlin() + .arg("search").arg("TODO") + .assert() + .stdout(predicate::str::contains("TODO.txt")); + + marlin() + .arg("search").arg("attr:reviewed=yes") + .assert() + .stdout(predicate::str::contains("Q1.pdf")); + + /* ── 5 ░ link flow & backlinks ────────────────────────────── */ + + let foo = demo_dir.join("foo.txt"); + let bar = demo_dir.join("bar.txt"); + fs::write(&foo, "")?; + fs::write(&bar, "")?; + + ok(marlin().arg("scan").arg(&demo_dir)); + + ok(marlin() + .arg("link").arg("add") + .arg(&foo).arg(&bar)); + + marlin() + .arg("link").arg("backlinks").arg(&bar) + .assert() + .stdout(predicate::str::contains("foo.txt")); + + /* ── 6 ░ backup → delete DB → restore ────────────────────── */ + + let backup_path = String::from_utf8( + marlin().arg("backup").output()?.stdout + )?; + let backup_file = backup_path.split_whitespace().last().unwrap(); + + fs::remove_file(&db_path)?; // simulate corruption + ok(marlin().arg("restore").arg(backup_file)); // restore + + // Search must still work afterwards + marlin() + .arg("search").arg("TODO") + .assert() + .stdout(predicate::str::contains("TODO.txt")); + + Ok(()) +} + diff --git a/tests/neg.rs b/tests/neg.rs new file mode 100644 index 0000000..89af7f1 --- /dev/null +++ b/tests/neg.rs @@ -0,0 +1,81 @@ +//! Negative-path integration tests (“should fail / warn”). + +use predicates::str; +use tempfile::tempdir; + +mod util; +use util::marlin; + +/* ───────────────────────── LINKS ─────────────────────────────── */ + +#[test] +fn link_non_indexed_should_fail() { + let tmp = tempdir().unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + std::fs::write(tmp.path().join("foo.txt"), "").unwrap(); + std::fs::write(tmp.path().join("bar.txt"), "").unwrap(); + + marlin(&tmp) + .current_dir(tmp.path()) + .args([ + "link", "add", + &tmp.path().join("foo.txt").to_string_lossy(), + &tmp.path().join("bar.txt").to_string_lossy() + ]) + .assert() + .failure() + .stderr(str::contains("file not indexed")); +} + +/* ───────────────────────── ATTR ─────────────────────────────── */ + +#[test] +fn attr_set_on_non_indexed_file_should_warn() { + let tmp = tempdir().unwrap(); + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + let ghost = tmp.path().join("ghost.txt"); + std::fs::write(&ghost, "").unwrap(); + + marlin(&tmp) + .args(["attr","set", + &ghost.to_string_lossy(),"foo","bar"]) + .assert() + .success() // exits 0 + .stderr(str::contains("not indexed")); +} + +/* ───────────────────── COLLECTIONS ───────────────────────────── */ + +#[test] +fn coll_add_unknown_collection_should_fail() { + let tmp = tempdir().unwrap(); + let file = tmp.path().join("doc.txt"); + std::fs::write(&file, "").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + marlin(&tmp) + .args(["coll","add","nope",&file.to_string_lossy()]) + .assert() + .failure(); +} + +/* ───────────────────── RESTORE (bad file) ───────────────────── */ + +#[test] +fn restore_with_nonexistent_backup_should_fail() { + let tmp = tempdir().unwrap(); + + // create an empty DB first + marlin(&tmp).arg("init").assert().success(); + + marlin(&tmp) + .args(["restore", "/definitely/not/here.db"]) + .assert() + .failure() + .stderr(str::contains("Failed to restore")); +} + diff --git a/tests/pos.rs b/tests/pos.rs new file mode 100644 index 0000000..1d00659 --- /dev/null +++ b/tests/pos.rs @@ -0,0 +1,171 @@ +//! Positive-path integration checks for every sub-command +//! that already has real logic behind it. + +mod util; +use util::marlin; + +use predicates::{prelude::*, str}; // brings `PredicateBooleanExt::and` +use std::fs; +use tempfile::tempdir; + +/* ─────────────────────────── TAG ─────────────────────────────── */ + +#[test] +fn tag_should_add_hierarchical_tag_and_search_finds_it() { + let tmp = tempdir().unwrap(); + let file = tmp.path().join("foo.md"); + fs::write(&file, "# test\n").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + marlin(&tmp) + .args(["tag", file.to_str().unwrap(), "project/md"]) + .assert().success(); + + marlin(&tmp) + .args(["search", "tag:project/md"]) + .assert() + .success() + .stdout(str::contains("foo.md")); +} + +/* ─────────────────────────── ATTR ────────────────────────────── */ + +#[test] +fn attr_set_then_ls_roundtrip() { + let tmp = tempdir().unwrap(); + let file = tmp.path().join("report.pdf"); + fs::write(&file, "%PDF-1.4\n").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + marlin(&tmp) + .args(["attr", "set", file.to_str().unwrap(), "reviewed", "yes"]) + .assert().success(); + + marlin(&tmp) + .args(["attr", "ls", file.to_str().unwrap()]) + .assert() + .success() + .stdout(str::contains("reviewed = yes")); +} + +/* ─────────────────────── COLLECTIONS ────────────────────────── */ + +#[test] +fn coll_create_add_and_list() { + let tmp = tempdir().unwrap(); + + let a = tmp.path().join("a.txt"); + let b = tmp.path().join("b.txt"); + fs::write(&a, "").unwrap(); + fs::write(&b, "").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + marlin(&tmp).args(["coll", "create", "Set"]).assert().success(); + for f in [&a, &b] { + marlin(&tmp).args(["coll", "add", "Set", f.to_str().unwrap()]).assert().success(); + } + + marlin(&tmp) + .args(["coll", "list", "Set"]) + .assert() + .success() + .stdout(str::contains("a.txt").and(str::contains("b.txt"))); +} + +/* ─────────────────────────── VIEWS ───────────────────────────── */ + +#[test] +fn view_save_list_and_exec() { + let tmp = tempdir().unwrap(); + + let todo = tmp.path().join("TODO.txt"); + fs::write(&todo, "remember the milk\n").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + // save & list + marlin(&tmp).args(["view", "save", "tasks", "milk"]).assert().success(); + marlin(&tmp) + .args(["view", "list"]) + .assert() + .success() + .stdout(str::contains("tasks: milk")); + + // exec + marlin(&tmp) + .args(["view", "exec", "tasks"]) + .assert() + .success() + .stdout(str::contains("TODO.txt")); +} + +/* ─────────────────────────── LINKS ───────────────────────────── */ + +#[test] +fn link_add_rm_and_list() { + let tmp = tempdir().unwrap(); + + let foo = tmp.path().join("foo.txt"); + let bar = tmp.path().join("bar.txt"); + fs::write(&foo, "").unwrap(); + fs::write(&bar, "").unwrap(); + + // handy closure + let mc = || marlin(&tmp); + + mc().current_dir(tmp.path()).arg("init").assert().success(); + mc().args(["scan", tmp.path().to_str().unwrap()]).assert().success(); + + // add + mc().args(["link", "add", foo.to_str().unwrap(), bar.to_str().unwrap()]) + .assert().success(); + + // list (outgoing default) + mc().args(["link", "list", foo.to_str().unwrap()]) + .assert().success() + .stdout(str::contains("foo.txt").and(str::contains("bar.txt"))); + + // remove + mc().args(["link", "rm", foo.to_str().unwrap(), bar.to_str().unwrap()]) + .assert().success(); + + // list now empty + mc().args(["link", "list", foo.to_str().unwrap()]) + .assert().success() + .stdout(str::is_empty()); +} + +/* ─────────────────────── SCAN (multi-path) ───────────────────── */ + +#[test] +fn scan_with_multiple_paths_indexes_all() { + let tmp = tempdir().unwrap(); + + let dir_a = tmp.path().join("A"); + let dir_b = tmp.path().join("B"); + std::fs::create_dir_all(&dir_a).unwrap(); + std::fs::create_dir_all(&dir_b).unwrap(); + let f1 = dir_a.join("one.txt"); + let f2 = dir_b.join("two.txt"); + fs::write(&f1, "").unwrap(); + fs::write(&f2, "").unwrap(); + + marlin(&tmp).current_dir(tmp.path()).arg("init").assert().success(); + + // multi-path scan + marlin(&tmp) + .args(["scan", dir_a.to_str().unwrap(), dir_b.to_str().unwrap()]) + .assert().success(); + + // both files findable + for term in ["one.txt", "two.txt"] { + marlin(&tmp).args(["search", term]) + .assert() + .success() + .stdout(str::contains(term)); + } +} + diff --git a/tests/test.md b/tests/test.md new file mode 100644 index 0000000..bc1abc1 --- /dev/null +++ b/tests/test.md @@ -0,0 +1,68 @@ +# Testing + +Below is a **repeat-able 3-step flow** you can use **every time you pull fresh code**. + +--- + +## 0 Prepare once + +```bash +# Run once (or add to ~/.bashrc) so debug + release artefacts land +# in the same predictable place. Speeds-up future builds. +export CARGO_TARGET_DIR=target +``` + +--- + +## 1 Build the new binary + +```bash +git pull # grab the latest commit +cargo build --release +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin +``` + +* `cargo build --release` – builds the optimised binary. +* `install …` – copies it into your `$PATH` so `marlin` on the CLI is the fresh one. + +--- + +## 2 Run the smoke-test suite + +```bash +# Runs the end-to-end test we added in tests/e2e.rs +cargo test --test e2e -- --nocapture +``` + +* `--test e2e` – compiles and runs **only** `tests/e2e.rs`; other unit-tests are skipped (add them later if you like). +* `--nocapture` – streams stdout/stderr so you can watch each CLI step in real time. +* Exit-code **0** ➜ everything passed. + Any non-zero exit or a red ✗ line means a step failed; the assert’s diff will show the command and its output. + +--- + +## 3 (Optionally) run all tests + +```bash +cargo test --all -- --nocapture +``` + +This will execute: + +* unit tests in `src/**` +* every file in `tests/` +* doc-tests + +If you wire **“cargo test --all”** into CI (GitHub Actions, GitLab, etc.), pushes that break a workflow will be rejected automatically. + +--- + +### One-liner helper (copy/paste) + +```bash +git pull && cargo build --release && +sudo install -Dm755 target/release/marlin /usr/local/bin/marlin && +cargo test --all -- --nocapture +``` + +Stick that in a shell alias (`alias marlin-ci='…'`) and you’ve got a 5-second upgrade-and-verify loop. diff --git a/tests/util.rs b/tests/util.rs new file mode 100644 index 0000000..5f19ffb --- /dev/null +++ b/tests/util.rs @@ -0,0 +1,23 @@ +//! tests/util.rs +//! Small helpers shared across integration tests. + +use std::path::{Path, PathBuf}; +use tempfile::TempDir; +use assert_cmd::Command; +/// Absolute path to the freshly-built `marlin` binary. +pub fn bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_marlin")) +} + +/// Build a `Command` for `marlin` whose `MARLIN_DB_PATH` is +/// `/index.db`. +/// +/// Each call yields a brand-new `Command`, so callers can freely add +/// arguments, change the working directory, etc., without affecting +/// other invocations. +pub fn marlin(tmp: &TempDir) -> Command { + let db_path: &Path = &tmp.path().join("index.db"); + let mut cmd = Command::new(bin()); + cmd.env("MARLIN_DB_PATH", db_path); + cmd +}