This commit is contained in:
thePR0M3TH3AN
2025-05-14 17:51:17 -04:00
parent 9d3e0ffef7
commit fe8aa18803
14 changed files with 646 additions and 328 deletions

5
.gitignore vendored
View File

@@ -36,3 +36,8 @@ test.db
# === Environment variables and secrets ===
.env
.env.*
# === Other ===
repo-context.txt
saved_config.yaml

215
Cargo.lock generated
View File

@@ -23,6 +23,21 @@ dependencies = [
"memchr",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.18"
@@ -79,12 +94,24 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "bitflags"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "cc"
version = "1.2.22"
@@ -100,6 +127,20 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clap"
version = "4.5.38"
@@ -146,6 +187,12 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "directories"
version = "5.0.1"
@@ -220,12 +267,46 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "iana-time-zone"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@@ -270,10 +351,12 @@ name = "marlin"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"directories",
"glob",
"rusqlite",
"shlex",
"tracing",
"tracing-subscriber",
"walkdir",
@@ -304,6 +387,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -421,6 +513,12 @@ dependencies = [
"smallvec",
]
[[package]]
name = "rustversion"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
[[package]]
name = "same-file"
version = "1.0.6"
@@ -605,6 +703,64 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "winapi"
version = "0.3.9"
@@ -636,6 +792,65 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
[[package]]
name = "windows-result"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.48.0"

View File

@@ -4,11 +4,14 @@ version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0"
clap = { version = "4.5.2", features = ["derive"] }
directories = "5.0"
anyhow = "1"
clap = { version = "4", features = ["derive"] }
directories = "5"
glob = "0.3"
rusqlite = { version = "0.31.0", features = ["bundled"] }
rusqlite = { version = "0.31", features = ["bundled", "backup"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
walkdir = "2.5"
shlex = "1.3"
chrono = "0.4"

241
README.md
View File

@@ -1,75 +1,51 @@
![Marlin Logo](https://raw.githubusercontent.com/PR0M3TH3AN/Marlin/refs/heads/main/assets/png/marlin_logo.png?token=GHSAT0AAAAAADDJQCM7EIFN3NMAIUGVOUQO2BE7YQA)
![Marlin Logo](https://raw.githubusercontent.com/PR0M3TH3AN/Marlin/refs/heads/main/assets/png/marlin_logo.png)
# Marlin
**Marlin** is a lightweight, metadata-driven file indexer you run on your own
machine. It scans folders, stores paths and basic stats in a local SQLite
database, and lets you tag files from the command line.
**Marlin** is a lightweight, metadata-driven file indexer that runs entirely on
your computer.
It scans folders, stores paths and file stats in SQLite, lets you add
hierarchical **tags** and **custom attributes**, takes automatic snapshots, and
offers instant full-text search with FTS5.
Nothing ever leaves your machine.
Nothing leaves your computer.
---
This repo contains the **Sprint-0 foundation**:
## Feature highlights
* XDG-aware config — no hard-coded paths
* Embedded SQLite migrations (WAL mode)
* Fast directory scanner (now accepts *multiple* paths in one call)
* Simple tagging tool
* Human-readable logging via `tracing`
| Area | What you get |
|----------------|---------------------------------------------------------------------------------|
| **Safety** | Timestamped backups `marlin backup` and one-command restore `marlin restore` |
| **Upgrades** | Automatic schema migrations + dynamic column adds |
| **Indexing** | Fast multi-path scanner (WAL mode) |
| **Metadata** | Hierarchical tags (`project/alpha`) & key-value attributes (`reviewed=yes`) |
| **Search** | Prefix-aware FTS5, optional `--exec` action per hit |
| **DX / Logs** | Readable tracing (`RUST_LOG=debug …`) |
---
## How it works
```text
┌──────────────┐ scan dir(s) ┌─────────────┐
│ your files │ ───────────────▶│ SQLite │
└──────────────┘index.db
▲ tag <glob> <tag> │ files tags
└────────────────────────┴─────────────┘
┌──────────────┐ marlin scan ┌─────────────┐
│ your files │ ─────────────────────▶│ SQLite │
│ (any folder) │ files/tags
└──────────────┘ tag / attr │ attrs / FTS
▲ search / exec └──────┬──────┘
└────────── backup / restore ▼
timestamped snapshots
````
1. `marlin scan <PATHS>...` walks each directory tree, gathers size and
modification time, then upserts rows into **`files`**.
2. `marlin tag "<glob>" <tag>` looks up each matching file row and inserts
junction rows into **`file_tags`**. New tag names are created on the fly.
3. You can open the DB yourself
(`sqlite3 ~/.local/share/marlin/index.db`) while search and GUI features
are still under construction.
---
## Prerequisites
| What | Why |
| ---------------- | --------------------------------------------------- |
| Requirement | Why |
| ------------------ | -------------------------------------- |
| **Rust** ≥ 1.77 | Build toolchain (`rustup.rs`) |
| Build essentials | `gcc`, `make`, etc. for `rusqlite`s bundled SQLite |
| C build essentials | `gcc`, `make`, etc. for bundled SQLite |
<details><summary>Platform notes</summary>
### Windows
`rustup-init.exe` installs MSVC build tools automatically.
### macOS
```bash
xcode-select --install # command-line tools
```
### Linux (Debian / Ubuntu)
```bash
sudo apt install build-essential
```
or on Fedora / RHEL
```bash
sudo dnf groupinstall "Development Tools"
```
</details>
*(Windows/macOS: let the Rust installer pull the matching build tools.)*
---
@@ -78,12 +54,8 @@ sudo dnf groupinstall "Development Tools"
```bash
git clone https://github.com/yourname/marlin.git
cd marlin
cargo build --release # produces target/release/marlin
```
Copy the release binary somewhere on your `PATH` (optional):
```bash
cargo build --release
# optional: add to PATH
sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
```
@@ -92,25 +64,21 @@ sudo install -Dm755 target/release/marlin /usr/local/bin/marlin
## Quick start
```bash
# 1 create or upgrade the database (idempotent)
marlin init
# 2 index all common folders in one shot
marlin scan ~/Pictures ~/Documents ~/Downloads ~/Music ~/Videos
# 3 add a tag to matching files
marlin tag "~/Pictures/**/*.jpg" vacation
marlin init # create DB
marlin scan ~/Pictures ~/Documents # index files
marlin tag "~/Pictures/**/*.jpg" photos/trip-2024 # add tag
marlin attr set "~/Documents/**/*.pdf" reviewed yes
marlin search reviewed --exec "xdg-open {}" # open hits
marlin backup # snapshot DB
```
The database path defaults to:
### Database location
```
~/.local/share/marlin/index.db # Linux
~/Library/Application Support/marlin # macOS
%APPDATA%\marlin\index.db # Windows
```
* **Linux** `~/.local/share/marlin/index.db`
* **macOS** `~/Library/Application Support/marlin/index.db`
* **Windows** `%APPDATA%\marlin\index.db`
Override with:
Override:
```bash
export MARLIN_DB_PATH=/path/to/custom.db
@@ -121,83 +89,96 @@ export MARLIN_DB_PATH=/path/to/custom.db
## CLI reference
```text
USAGE:
marlin <COMMAND> [ARGS]
marlin <COMMAND> [ARGS]
COMMANDS:
init Create (or upgrade) the SQLite database
scan <PATHS>... Walk one or more directories recursively
tag "<glob>" <tag> Apply <tag> to all files matched
FLAGS:
-h, --help Show this help
-V, --version Show version info
init create / migrate database
scan <PATHS>... walk directories & index files
tag "<glob>" <tag_path> add hierarchical tag
attr set|ls … manage custom attributes
search <query> [--exec CMD] FTS query, optionally run CMD on each hit
backup create timestamped snapshot in backups/
restore <snapshot.db> replace DB with snapshot
```
| Command | Notes |
| --------------------------- | ----------------------------------------------------------------------------------------------------- |
| `marlin init` | Safe to run repeatedly; applies pending migrations. |
| `marlin scan <PATHS>...` | Accepts any number of absolute/relative paths. Directories you cant read are skipped with a warning. |
| `marlin tag "<glob>" <tag>` | Quote the glob so your shell doesnt expand it. Uses `glob` crate rules (`**` for recursive matches). |
### Attribute subcommands
| Command | Example |
| ---------- | ------------------------------------------------ |
| `attr set` | `marlin attr set "~/Docs/**/*.pdf" reviewed yes` |
| `attr ls` | `marlin attr ls ~/Docs/report.pdf` |
---
## Backups & restore
* **Create snapshot**
```bash
marlin backup
# → ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
* **Restore snapshot**
```bash
marlin restore ~/.local/share/marlin/backups/backup_2025-05-14_22-15-30.db
```
Marlin automatically takes a safety backup before any schema migration.
---
## Upgrading to a new build
During development youll be editing source files frequently. Two common ways
to run the updated program:
### 1. Run straight from the project directory
```bash
cargo run --release -- scan ~/Pictures
cargo install --path . --force # rebuild & overwrite installed binary
```
*Cargo recompiles what changed and runs the fresh binary located in
`target/release/marlin`.*
### 2. Replace the global copy
If you previously installed Marlin (e.g. into `~/.cargo/bin/` or `/usr/local/bin/`),
overwrite it:
```bash
cargo install --path . --force
```
Now `which marlin` should print the new location, and multi-path scan works:
```bash
marlin scan ~/Pictures ~/Documents …
```
If the CLI still shows the old single-path usage (`Usage: marlin scan <PATH>`),
youre invoking an outdated executable—check your `PATH` and reinstall.
---
## Development tips
* Tight loop: `cargo watch -x 'run -- scan ~/Pictures'`
* Debug logs: `RUST_LOG=debug marlin scan ~/Pictures`
* Lint: `cargo clippy --all-targets --all-features -D warnings`
* Tests: `cargo test`
Backups + dynamic migrations mean your data is preserved across upgrades.
---
## Roadmap
| Milestone | Coming soon |
| --------- | --------------------------------------------------------------- |
| **M1** | Hierarchical tags • attributes table • `tags://` virtual folder |
| **M2** | Sync service • change log diff viewer |
| **M3** | Natural-language search • visual query builder |
| **M4** | Plug-in marketplace • mobile companion (view-only) |
| Milestone | Focus |
| --------- | -------------------------------------------------- |
| **M1** | `tags://` virtual folder • attribute search DSL |
| **M2** | Real-time sync service • change-log diff viewer |
| **M3** | Natural-language query builder |
| **M4** | Plug-in marketplace • mobile (read-only) companion |
---
## Five-minute tutorial
```bash
# 0. Playground
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
echo "Alpha draft" > ~/marlin_demo/Projects/Alpha/draft.txt
echo "Receipt PDF" > ~/marlin_demo/Docs/receipt.pdf
echo "fake jpg" > ~/marlin_demo/Media/Photos/vacation.jpg
# 1. Init & scan
marlin init
marlin scan ~/marlin_demo
# 2. Tags & attributes
marlin tag "~/marlin_demo/Projects/Alpha/**/*" project/alpha
marlin attr set "~/marlin_demo/**/*.pdf" reviewed yes
# 3. Search
marlin search alpha
marlin search reviewed --exec "echo Found: {}"
# 4. Snapshot & restore
marlin backup
marlin restore ~/.local/share/marlin/backups/backup_YYYY-MM-DD_HH-MM-SS.db
```
---
## License
Released under the **MIT License** see `LICENSE` for full text.
MIT see `LICENSE`

View File

@@ -1,3 +1,4 @@
// src/cli.rs
use std::path::PathBuf;
use clap::{Parser, Subcommand};
@@ -16,22 +17,40 @@ pub enum Commands {
Init,
/// Scan one or more directories and populate the file index
///
/// Example:
/// marlin scan ~/Pictures ~/Documents ~/Downloads
Scan {
/// One or more directories to walk
paths: Vec<PathBuf>,
},
/// Tag files matching a glob pattern
///
/// Example:
/// marlin tag "~/Pictures/**/*.jpg" vacation
/// Tag files matching a glob pattern (hierarchical tags use `/`)
Tag {
/// Glob pattern (quote to avoid shell expansion)
pattern: String,
/// Tag name
tag: String,
tag_path: String,
},
/// Manage custom attributes
Attr {
#[command(subcommand)]
action: AttrCmd,
},
/// Full-text search; `--exec CMD` runs CMD on each hit (`{}` placeholder)
Search {
query: String,
#[arg(long)]
exec: Option<String>,
},
/// Create a timestamped backup of the database
Backup,
/// Restore from a backup file (over-writes current DB)
Restore {
backup_path: PathBuf,
},
}
#[derive(Subcommand, Debug)]
pub enum AttrCmd {
Set { pattern: String, key: String, value: String },
Ls { path: PathBuf },
}

View File

@@ -1,5 +1,6 @@
PRAGMA foreign_keys = ON;
-- ─── core tables ───────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
@@ -9,7 +10,9 @@ CREATE TABLE IF NOT EXISTS files (
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
name TEXT NOT NULL UNIQUE,
parent_id INTEGER REFERENCES tags(id),
canonical_id INTEGER REFERENCES tags(id)
);
CREATE TABLE IF NOT EXISTS file_tags (
@@ -18,5 +21,41 @@ CREATE TABLE IF NOT EXISTS file_tags (
PRIMARY KEY (file_id, tag_id)
);
CREATE TABLE IF NOT EXISTS attributes (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT
);
-- optional free-form JSON metadata
CREATE TABLE IF NOT EXISTS json_meta (
file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
data TEXT -- arbitrary JSON blob
);
-- ─── full-text search ──────────────────────────────────────────────────
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
USING fts5(
path,
content='files', content_rowid='id',
prefix='2 3 4 5 6 7 8 9 10'
);
CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
INSERT INTO files_fts(rowid, path) VALUES (new.id, new.path);
END;
CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN
UPDATE files_fts SET path = new.path WHERE rowid = new.id;
END;
CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
DELETE FROM files_fts WHERE rowid = old.id;
END;
-- ─── version table for incremental migrations ─────────────────────────
CREATE TABLE IF NOT EXISTS schema_version (version INTEGER PRIMARY KEY);
-- ─── useful indexes ────────────────────────────────────────────────────
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id);
CREATE INDEX IF NOT EXISTS idx_attr_file_key ON attributes(file_id, key);

View File

@@ -1,28 +1,118 @@
use std::path::Path;
// src/db/mod.rs
use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::Result;
use rusqlite::{params, Connection};
use chrono::Local;
use rusqlite::{
backup::{Backup, StepResult},
params, Connection, OpenFlags,
};
const MIGRATIONS_SQL: &str = include_str!("migrations.sql");
/// Open (or create) the SQLite database and run embedded migrations.
/// Open (or create) the DB, apply migrations, add any missing columns,
/// and rebuild the FTS index if needed.
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
let mut conn = Connection::open(db_path)?;
let conn = Connection::open(&db_path)?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.execute_batch(MIGRATIONS_SQL)?;
// example of dynamic column addition: files.hash TEXT
ensure_column(&conn, "files", "hash", "TEXT")?;
// ensure FTS picks up tokenizer / prefix changes
conn.execute("INSERT INTO files_fts(files_fts) VALUES('rebuild')", [])?;
Ok(conn)
}
/// Ensure a tag exists, returning its id.
pub fn ensure_tag(conn: &Connection, tag: &str) -> Result<i64> {
/// Add a column if it does not already exist.
fn ensure_column(conn: &Connection, table: &str, col: &str, ddl_type: &str) -> Result<()> {
// PRAGMA table_info returns rows with (cid, name, type, ...)
let mut exists = false;
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table});"))?;
let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
for name in rows.flatten() {
if name == col {
exists = true;
break;
}
}
if !exists {
conn.execute(
"INSERT OR IGNORE INTO tags(name) VALUES (?1)",
params![tag],
&format!("ALTER TABLE {table} ADD COLUMN {col} {ddl_type};"),
[],
)?;
}
Ok(())
}
/// Ensure a (possibly hierarchical) tag exists and return the leaf tag id.
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let mut parent: Option<i64> = None;
for segment in path.split('/').filter(|s| !s.is_empty()) {
conn.execute(
"INSERT OR IGNORE INTO tags(name, parent_id) VALUES (?1, ?2)",
params![segment, parent],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1",
params![tag],
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
|row| row.get(0),
)?;
Ok(id)
parent = Some(id);
}
parent.ok_or_else(|| anyhow::anyhow!("empty tag path"))
}
/// Look up `files.id` by absolute path.
pub fn file_id(conn: &Connection, path: &str) -> Result<i64> {
conn.query_row("SELECT id FROM files WHERE path = ?1", [path], |r| r.get(0))
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
/// Insert or update an attribute.
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
INSERT INTO attributes(file_id, key, value)
VALUES (?1, ?2, ?3)
ON CONFLICT(file_id, key) DO UPDATE SET value = excluded.value
"#,
params![file_id, key, value],
)?;
Ok(())
}
/// Create a **consistent snapshot** of the DB and return the backup path.
pub fn backup<P: AsRef<Path>>(db_path: P) -> Result<PathBuf> {
let src = db_path.as_ref();
let dir = src
.parent()
.ok_or_else(|| anyhow::anyhow!("invalid DB path"))?
.join("backups");
fs::create_dir_all(&dir)?;
let stamp = Local::now().format("%Y-%m-%d_%H-%M-%S");
let dst = dir.join(format!("backup_{stamp}.db"));
// open connections: src read-only, dst writable
let src_conn = Connection::open_with_flags(src, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let mut dst_conn = Connection::open(&dst)?;
// run online backup
let mut bk = Backup::new(&src_conn, &mut dst_conn)?;
while let StepResult::More = bk.step(100)? {}
// Backup finalised when `bk` is dropped.
Ok(dst)
}
/// Replace the live DB file with a snapshot (caller must have closed handles).
pub fn restore<P: AsRef<Path>>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}

View File

@@ -1,3 +1,4 @@
// src/main.rs
mod cli;
mod config;
mod db;
@@ -6,7 +7,7 @@ mod scan;
use anyhow::Result;
use clap::Parser;
use cli::{Cli, Commands};
use cli::{AttrCmd, Cli, Commands};
use glob::glob;
use rusqlite::params;
use tracing::{error, info};
@@ -16,6 +17,13 @@ fn main() -> Result<()> {
let args = Cli::parse();
let cfg = config::Config::load()?;
// snapshot unless doing an explicit backup / restore
if !matches!(args.command, Commands::Backup | Commands::Restore { .. }) {
let _ = db::backup(&cfg.db_path);
}
// open database (runs migrations / dynamic column adds)
let mut conn = db::open(&cfg.db_path)?;
match args.command {
@@ -27,22 +35,41 @@ fn main() -> Result<()> {
if paths.is_empty() {
anyhow::bail!("At least one directory must be supplied to `scan`");
}
for path in paths {
scan::scan_directory(&mut conn, &path)?;
for p in paths {
scan::scan_directory(&mut conn, &p)?;
}
}
Commands::Tag { pattern, tag } => {
apply_tag(&conn, &pattern, &tag)?;
Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?,
Commands::Attr { action } => match action {
// borrow the Strings so attr_set gets &str
AttrCmd::Set { pattern, key, value } => {
attr_set(&conn, &pattern, &key, &value)?
}
AttrCmd::Ls { path } => attr_ls(&conn, &path)?,
},
Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
Commands::Backup => {
let path = db::backup(&cfg.db_path)?;
println!("Backup created: {}", path.display());
}
Commands::Restore { backup_path } => {
drop(conn); // close handle
db::restore(&backup_path, &cfg.db_path)?;
println!("Restored from {}", backup_path.display());
}
}
Ok(())
}
/// Apply `tag` to every file that matches `pattern`.
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()> {
let tag_id = db::ensure_tag(conn, tag)?;
/* ─── tagging ────────────────────────────────────────────────────────── */
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> {
let tag_id = db::ensure_tag_path(conn, tag_path)?;
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert =
conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?;
@@ -55,7 +82,7 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()
stmt_file.query_row(params![path_str], |row| row.get::<_, i64>(0))
{
stmt_insert.execute(params![file_id, tag_id])?;
info!(file = %path_str, tag = tag, "tagged");
info!(file = %path_str, tag = tag_path, "tagged");
} else {
error!(file = %path_str, "file not in index run `marlin scan` first");
}
@@ -65,3 +92,84 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()
}
Ok(())
}
/* ─── attributes ─────────────────────────────────────────────────────── */
fn attr_set(conn: &rusqlite::Connection, pattern: &str, key: &str, value: &str) -> Result<()> {
for entry in glob(pattern)? {
match entry {
Ok(path) => {
let path_str = path.to_string_lossy();
let file_id = db::file_id(conn, &path_str)?;
db::upsert_attr(conn, file_id, key, value)?;
info!(file = %path_str, key = key, value = value, "attr set");
}
Err(e) => error!(error = %e, "glob error"),
}
}
Ok(())
}
fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
let file_id = db::file_id(conn, &path.to_string_lossy())?;
let mut stmt = conn.prepare("SELECT key, value FROM attributes WHERE file_id = ?1")?;
let rows = stmt.query_map([file_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
for row in rows {
let (k, v) = row?;
println!("{k} = {v}");
}
Ok(())
}
/* ─── search helpers ─────────────────────────────────────────────────── */
fn run_search(conn: &rusqlite::Connection, raw: &str, exec: Option<String>) -> Result<()> {
let hits = search(conn, raw)?;
if hits.is_empty() && exec.is_none() {
eprintln!("No matches for `{}`", raw);
return Ok(());
}
if let Some(cmd_tpl) = exec {
for path in hits {
let cmd_final = if cmd_tpl.contains("{}") {
cmd_tpl.replace("{}", &path)
} else {
format!("{cmd_tpl} \"{path}\"")
};
let mut parts = cmd_final.splitn(2, ' ');
let prog = parts.next().unwrap();
let args = parts.next().unwrap_or("");
let status = std::process::Command::new(prog)
.args(shlex::split(args).unwrap_or_default())
.status()?;
if !status.success() {
error!(file = %path, "command failed");
}
}
} else {
for p in hits {
println!("{p}");
}
}
Ok(())
}
fn search(conn: &rusqlite::Connection, raw: &str) -> Result<Vec<String>> {
let q = if raw.split_ascii_whitespace().count() == 1
&& !raw.contains(&['"', '\'', ':', '*', '(', ')', '~', '+', '-'][..])
{
format!("{raw}*")
} else {
raw.to_string()
};
let mut stmt = conn.prepare(
r#"
SELECT f.path FROM files_fts
JOIN files f ON f.rowid = files_fts.rowid
WHERE files_fts MATCH ?1
"#,
)?;
let rows = stmt.query_map([&q], |row| row.get::<_, String>(0))?;
Ok(rows.filter_map(Result::ok).collect())
}

View File

@@ -1,3 +1,4 @@
// src/scan.rs (unchanged except tiny doc tweak)
use std::fs;
use std::path::Path;
@@ -7,6 +8,7 @@ use tracing::{debug, info};
use walkdir::WalkDir;
/// Recursively walk `root` and upsert file metadata.
/// Triggers keep the FTS table in sync.
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
let tx = conn.transaction()?;
let mut stmt = tx.prepare(
@@ -38,8 +40,8 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
debug!(file = %path_str, "indexed");
}
drop(stmt); // <- release borrow before commit
tx.commit()?; // can now move tx
drop(stmt);
tx.commit()?;
info!(indexed = count, "scan complete");
Ok(count)
}

View File

@@ -1,144 +0,0 @@
# Marlin Usage Tutorial
Below is a hands-on lab you can run in a throw-away directory.
It shows how Marlins **tags** give you cross-folder “links” that a plain Bash
workflow cant match without resorting to symlinks or scratch scripts.
Everything uses *only the functionality that exists today* (`init / scan / tag`)
plus one `sqlite3` query for discovery.
---
## 0 . Prep
```bash
# make a playground so we don't touch real files
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
cd ~/marlin_demo
```
### Create a handful of files
```bash
echo "Alpha draft" > Projects/Alpha/draft.txt
echo "Alpha final" > Projects/Alpha/final.txt
echo "Beta summary" > Projects/Beta/summary.md
echo "Budget spreadsheet" > Docs/budget.ods
echo "Scan of receipt" > Docs/receipt.pdf
echo "fake JPEG header" > Media/Photos/vacation001.jpg
echo "fake JPEG header" > Media/Photos/vacation002.jpg
```
---
## 1 . Initialise & scan
```bash
marlin init
marlin scan ~/marlin_demo
```
*What happened?*
Marlin walked every file under `~/marlin_demo` and upserted rows into `files`.
---
## 2 . Tagging adding cross-folder metadata
### Tag Alpha project files
```bash
marlin tag "~/marlin_demo/Projects/Alpha/**/*.txt" project-alpha
```
### Tag everything Markdown or ODS as **docs**
```bash
marlin tag "~/marlin_demo/**/*.md" docs
marlin tag "~/marlin_demo/**/*.ods" docs
```
### Tag photos
```bash
marlin tag "~/marlin_demo/Media/Photos/**/*.jpg" photos
```
You can layer tags—`vacation001.jpg` now has both `photos` and (later) `trip-2024`
if you choose to add that.
---
## 3 . Discovering files with plain SQL
Theres no `marlin search` command *yet*, but the DB is just SQLite:
```bash
sqlite3 ~/.local/share/marlin/index.db <<'SQL'
.headers on
.mode column
-- show all files tagged 'docs'
SELECT path
FROM files f
JOIN file_tags ft ON ft.file_id = f.id
JOIN tags t ON t.id = ft.tag_id
WHERE t.name = 'docs';
SQL
```
Expected output:
```
path
--------------------------------------------------------------
/home/user/marlin_demo/Projects/Beta/summary.md
/home/user/marlin_demo/Docs/budget.ods
```
Do the same for `project-alpha`:
```bash
sqlite3 ~/.local/share/marlin/index.db "
SELECT path FROM files
JOIN file_tags USING(file_id)
JOIN tags USING(tag_id)
WHERE tags.name = 'project-alpha';
"
```
---
## 4 . Why this beats a pure Bash approach
| Task | With Bash alone | With Marlin tags |
| -------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
| Gather every Alpha file (any extension) scattered across sub-folders | `find ~/Projects -path '*Alpha*'` (works) but blows up if naming scheme changes | One-time glob + `marlin tag ... project-alpha`, then just query by tag. |
| Re-classify files later | Mass-rename or new `find`/`grep` pipeline | `marlin tag` new glob or manual ad-hoc files; DB keeps history (future). |
| Combine orthogonal facets e.g. “docs AND project-alpha” | Complex `find` piped to `grep -F -f list.txt` or symlink forest | Future `marlin search docs AND project-alpha` (for now SQL query). |
| Persist metadata when files move | Must update symlinks / scripts | Scanner sees the move (once watcher lands); tags stay attached by inode/hash. |
Think of tags as **Git branches for files**—cheap, many-to-many, roam across
directories, and live in one place.
---
## 5 . Cleaning up
```bash
rm -rf ~/marlin_demo
sqlite3 ~/.local/share/marlin/index.db "DELETE FROM files; DELETE FROM tags; DELETE FROM file_tags;"
```
*(or simply delete the DB file to start fresh).*
---
### Recap
1. **Scan** every folder once.
2. **Tag** by glob to create semantic “links.”
3. **Query** the DB (today) or use future built-in search (soon).
Even with just these three commands, you get an index that answers questions
plain Bash would need an ever-growing tangle of `find`, `grep`, and symlinks to solve.

View File

@@ -1 +0,0 @@
885c93e5d7dcc32c

View File

@@ -1,2 +1,3 @@
{"$message_type":"diagnostic","message":"variable does not need to be mutable","code":{"code":"unused_mut","explanation":null},"level":"warning","spans":[{"file_name":"src/db/mod.rs","byte_start":283,"byte_end":291,"line_start":10,"line_end":10,"column_start":9,"column_end":17,"is_primary":true,"text":[{"text":" let mut conn = Connection::open(db_path)?;","highlight_start":9,"highlight_end":17}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[{"message":"`#[warn(unused_mut)]` on by default","code":null,"level":"note","spans":[],"children":[],"rendered":null},{"message":"remove this `mut`","code":null,"level":"help","spans":[{"file_name":"src/db/mod.rs","byte_start":283,"byte_end":287,"line_start":10,"line_end":10,"column_start":9,"column_end":13,"is_primary":true,"text":[{"text":" let mut conn = Connection::open(db_path)?;","highlight_start":9,"highlight_end":13}],"label":null,"suggested_replacement":"","suggestion_applicability":"MachineApplicable","expansion":null}],"children":[],"rendered":null}],"rendered":"\u001b[0m\u001b[1m\u001b[33mwarning\u001b[0m\u001b[0m\u001b[1m: variable does not need to be mutable\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/db/mod.rs:10:9\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m10\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m let mut conn = Connection::open(db_path)?;\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m----\u001b[0m\u001b[0m\u001b[1m\u001b[33m^^^^\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12mhelp: remove this `mut`\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m= \u001b[0m\u001b[0m\u001b[1mnote\u001b[0m\u001b[0m: `#[warn(unused_mut)]` on by default\u001b[0m\n\n"}
{"$message_type":"diagnostic","message":"1 warning emitted","code":null,"level":"warning","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[33mwarning\u001b[0m\u001b[0m\u001b[1m: 1 warning emitted\u001b[0m\n\n"}
{"$message_type":"diagnostic","message":"failed to resolve: use of unresolved module or unlinked crate `shlex`","code":{"code":"E0433","explanation":"An undeclared crate, module, or type was used.\n\nErroneous code example:\n\n```compile_fail,E0433\nlet map = HashMap::new();\n// error: failed to resolve: use of undeclared type `HashMap`\n```\n\nPlease verify you didn't misspell the type/module's name or that you didn't\nforget to import it:\n\n```\nuse std::collections::HashMap; // HashMap has been imported.\nlet map: HashMap<u32, u32> = HashMap::new(); // So it can be used!\n```\n\nIf you've expected to use a crate name:\n\n```compile_fail\nuse ferris_wheel::BigO;\n// error: failed to resolve: use of undeclared module or unlinked crate\n```\n\nMake sure the crate has been added as a dependency in `Cargo.toml`.\n\nTo use a module from your current crate, add the `crate::` prefix to the path.\n"},"level":"error","spans":[{"file_name":"src/main.rs","byte_start":1965,"byte_end":1970,"line_start":65,"line_end":65,"column_start":31,"column_end":36,"is_primary":true,"text":[{"text":" .args(shlex::split(args).unwrap_or_default())","highlight_start":31,"highlight_end":36}],"label":"use of unresolved module or unlinked crate `shlex`","suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[{"message":"if you wanted to use a crate named `shlex`, use `cargo add shlex` to add it to your `Cargo.toml`","code":null,"level":"help","spans":[],"children":[],"rendered":null}],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0433]\u001b[0m\u001b[0m\u001b[1m: failed to resolve: use of unresolved module or unlinked crate `shlex`\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/main.rs:65:31\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m65\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m .args(shlex::split(args).unwrap_or_default())\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9muse of unresolved module or unlinked crate `shlex`\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m= \u001b[0m\u001b[0m\u001b[1mhelp\u001b[0m\u001b[0m: if you wanted to use a crate named `shlex`, use `cargo add shlex` to add it to your `Cargo.toml`\u001b[0m\n\n"}
{"$message_type":"diagnostic","message":"aborting due to 1 previous error","code":null,"level":"error","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror\u001b[0m\u001b[0m\u001b[1m: aborting due to 1 previous error\u001b[0m\n\n"}
{"$message_type":"diagnostic","message":"For more information about this error, try `rustc --explain E0433`.","code":null,"level":"failure-note","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1mFor more information about this error, try `rustc --explain E0433`.\u001b[0m\n"}

Binary file not shown.