This commit is contained in:
thePR0M3TH3AN
2025-05-14 17:51:17 -04:00
parent 9d3e0ffef7
commit fe8aa18803
14 changed files with 646 additions and 328 deletions

View File

@@ -1,3 +1,4 @@
// src/cli.rs
use std::path::PathBuf;
use clap::{Parser, Subcommand};
@@ -16,22 +17,40 @@ pub enum Commands {
Init,
/// Scan one or more directories and populate the file index
///
/// Example:
/// marlin scan ~/Pictures ~/Documents ~/Downloads
Scan {
/// One or more directories to walk
paths: Vec<PathBuf>,
},
/// Tag files matching a glob pattern
///
/// Example:
/// marlin tag "~/Pictures/**/*.jpg" vacation
/// Tag files matching a glob pattern (hierarchical tags use `/`)
Tag {
/// Glob pattern (quote to avoid shell expansion)
pattern: String,
/// Tag name
tag: String,
tag_path: String,
},
/// Manage custom attributes
Attr {
#[command(subcommand)]
action: AttrCmd,
},
/// Full-text search; `--exec CMD` runs CMD on each hit (`{}` placeholder)
Search {
query: String,
#[arg(long)]
exec: Option<String>,
},
/// Create a timestamped backup of the database
Backup,
/// Restore from a backup file (over-writes current DB)
Restore {
backup_path: PathBuf,
},
}
#[derive(Subcommand, Debug)]
pub enum AttrCmd {
Set { pattern: String, key: String, value: String },
Ls { path: PathBuf },
}

View File

@@ -1,15 +1,18 @@
PRAGMA foreign_keys = ON;
-- ─── core tables ───────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
size INTEGER,
mtime INTEGER
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
size INTEGER,
mtime INTEGER
);
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
parent_id INTEGER REFERENCES tags(id),
canonical_id INTEGER REFERENCES tags(id)
);
CREATE TABLE IF NOT EXISTS file_tags (
@@ -18,5 +21,41 @@ CREATE TABLE IF NOT EXISTS file_tags (
PRIMARY KEY (file_id, tag_id)
);
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id);
CREATE TABLE IF NOT EXISTS attributes (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT
);
-- optional free-form JSON metadata
CREATE TABLE IF NOT EXISTS json_meta (
file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
data TEXT -- arbitrary JSON blob
);
-- ─── full-text search ──────────────────────────────────────────────────
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
USING fts5(
path,
content='files', content_rowid='id',
prefix='2 3 4 5 6 7 8 9 10'
);
CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
INSERT INTO files_fts(rowid, path) VALUES (new.id, new.path);
END;
CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN
UPDATE files_fts SET path = new.path WHERE rowid = new.id;
END;
CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
DELETE FROM files_fts WHERE rowid = old.id;
END;
-- ─── version table for incremental migrations ─────────────────────────
CREATE TABLE IF NOT EXISTS schema_version (version INTEGER PRIMARY KEY);
-- ─── useful indexes ────────────────────────────────────────────────────
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id);
CREATE INDEX IF NOT EXISTS idx_attr_file_key ON attributes(file_id, key);

View File

@@ -1,28 +1,118 @@
use std::path::Path;
// src/db/mod.rs
use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::Result;
use rusqlite::{params, Connection};
use chrono::Local;
use rusqlite::{
backup::{Backup, StepResult},
params, Connection, OpenFlags,
};
const MIGRATIONS_SQL: &str = include_str!("migrations.sql");
/// Open (or create) the SQLite database and run embedded migrations.
/// Open (or create) the DB, apply migrations, add any missing columns,
/// and rebuild the FTS index if needed.
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
let mut conn = Connection::open(db_path)?;
let conn = Connection::open(&db_path)?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.execute_batch(MIGRATIONS_SQL)?;
// example of dynamic column addition: files.hash TEXT
ensure_column(&conn, "files", "hash", "TEXT")?;
// ensure FTS picks up tokenizer / prefix changes
conn.execute("INSERT INTO files_fts(files_fts) VALUES('rebuild')", [])?;
Ok(conn)
}
/// Ensure a tag exists, returning its id.
pub fn ensure_tag(conn: &Connection, tag: &str) -> Result<i64> {
conn.execute(
"INSERT OR IGNORE INTO tags(name) VALUES (?1)",
params![tag],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1",
params![tag],
|row| row.get(0),
)?;
Ok(id)
/// Add a column if it does not already exist.
fn ensure_column(conn: &Connection, table: &str, col: &str, ddl_type: &str) -> Result<()> {
// PRAGMA table_info returns rows with (cid, name, type, ...)
let mut exists = false;
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table});"))?;
let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
for name in rows.flatten() {
if name == col {
exists = true;
break;
}
}
if !exists {
conn.execute(
&format!("ALTER TABLE {table} ADD COLUMN {col} {ddl_type};"),
[],
)?;
}
Ok(())
}
/// Ensure a (possibly hierarchical) tag exists and return the leaf tag id.
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let mut parent: Option<i64> = None;
for segment in path.split('/').filter(|s| !s.is_empty()) {
conn.execute(
"INSERT OR IGNORE INTO tags(name, parent_id) VALUES (?1, ?2)",
params![segment, parent],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
|row| row.get(0),
)?;
parent = Some(id);
}
parent.ok_or_else(|| anyhow::anyhow!("empty tag path"))
}
/// Look up `files.id` by absolute path.
pub fn file_id(conn: &Connection, path: &str) -> Result<i64> {
conn.query_row("SELECT id FROM files WHERE path = ?1", [path], |r| r.get(0))
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
/// Insert or update an attribute.
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
INSERT INTO attributes(file_id, key, value)
VALUES (?1, ?2, ?3)
ON CONFLICT(file_id, key) DO UPDATE SET value = excluded.value
"#,
params![file_id, key, value],
)?;
Ok(())
}
/// Create a **consistent snapshot** of the DB and return the backup path.
pub fn backup<P: AsRef<Path>>(db_path: P) -> Result<PathBuf> {
let src = db_path.as_ref();
let dir = src
.parent()
.ok_or_else(|| anyhow::anyhow!("invalid DB path"))?
.join("backups");
fs::create_dir_all(&dir)?;
let stamp = Local::now().format("%Y-%m-%d_%H-%M-%S");
let dst = dir.join(format!("backup_{stamp}.db"));
// open connections: src read-only, dst writable
let src_conn = Connection::open_with_flags(src, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let mut dst_conn = Connection::open(&dst)?;
// run online backup
let mut bk = Backup::new(&src_conn, &mut dst_conn)?;
while let StepResult::More = bk.step(100)? {}
// Backup finalised when `bk` is dropped.
Ok(dst)
}
/// Replace the live DB file with a snapshot (caller must have closed handles).
pub fn restore<P: AsRef<Path>>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}

View File

@@ -1,3 +1,4 @@
// src/main.rs
mod cli;
mod config;
mod db;
@@ -6,7 +7,7 @@ mod scan;
use anyhow::Result;
use clap::Parser;
use cli::{Cli, Commands};
use cli::{AttrCmd, Cli, Commands};
use glob::glob;
use rusqlite::params;
use tracing::{error, info};
@@ -16,6 +17,13 @@ fn main() -> Result<()> {
let args = Cli::parse();
let cfg = config::Config::load()?;
// snapshot unless doing an explicit backup / restore
if !matches!(args.command, Commands::Backup | Commands::Restore { .. }) {
let _ = db::backup(&cfg.db_path);
}
// open database (runs migrations / dynamic column adds)
let mut conn = db::open(&cfg.db_path)?;
match args.command {
@@ -27,22 +35,41 @@ fn main() -> Result<()> {
if paths.is_empty() {
anyhow::bail!("At least one directory must be supplied to `scan`");
}
for path in paths {
scan::scan_directory(&mut conn, &path)?;
for p in paths {
scan::scan_directory(&mut conn, &p)?;
}
}
Commands::Tag { pattern, tag } => {
apply_tag(&conn, &pattern, &tag)?;
Commands::Tag { pattern, tag_path } => apply_tag(&conn, &pattern, &tag_path)?,
Commands::Attr { action } => match action {
// borrow the Strings so attr_set gets &str
AttrCmd::Set { pattern, key, value } => {
attr_set(&conn, &pattern, &key, &value)?
}
AttrCmd::Ls { path } => attr_ls(&conn, &path)?,
},
Commands::Search { query, exec } => run_search(&conn, &query, exec)?,
Commands::Backup => {
let path = db::backup(&cfg.db_path)?;
println!("Backup created: {}", path.display());
}
Commands::Restore { backup_path } => {
drop(conn); // close handle
db::restore(&backup_path, &cfg.db_path)?;
println!("Restored from {}", backup_path.display());
}
}
Ok(())
}
/// Apply `tag` to every file that matches `pattern`.
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()> {
let tag_id = db::ensure_tag(conn, tag)?;
/* ─── tagging ────────────────────────────────────────────────────────── */
fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag_path: &str) -> Result<()> {
let tag_id = db::ensure_tag_path(conn, tag_path)?;
let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
let mut stmt_insert =
conn.prepare("INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)")?;
@@ -55,7 +82,7 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()
stmt_file.query_row(params![path_str], |row| row.get::<_, i64>(0))
{
stmt_insert.execute(params![file_id, tag_id])?;
info!(file = %path_str, tag = tag, "tagged");
info!(file = %path_str, tag = tag_path, "tagged");
} else {
error!(file = %path_str, "file not in index run `marlin scan` first");
}
@@ -65,3 +92,84 @@ fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()
}
Ok(())
}
/* ─── attributes ─────────────────────────────────────────────────────── */
fn attr_set(conn: &rusqlite::Connection, pattern: &str, key: &str, value: &str) -> Result<()> {
for entry in glob(pattern)? {
match entry {
Ok(path) => {
let path_str = path.to_string_lossy();
let file_id = db::file_id(conn, &path_str)?;
db::upsert_attr(conn, file_id, key, value)?;
info!(file = %path_str, key = key, value = value, "attr set");
}
Err(e) => error!(error = %e, "glob error"),
}
}
Ok(())
}
fn attr_ls(conn: &rusqlite::Connection, path: &std::path::Path) -> Result<()> {
let file_id = db::file_id(conn, &path.to_string_lossy())?;
let mut stmt = conn.prepare("SELECT key, value FROM attributes WHERE file_id = ?1")?;
let rows = stmt.query_map([file_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
for row in rows {
let (k, v) = row?;
println!("{k} = {v}");
}
Ok(())
}
/* ─── search helpers ─────────────────────────────────────────────────── */
fn run_search(conn: &rusqlite::Connection, raw: &str, exec: Option<String>) -> Result<()> {
let hits = search(conn, raw)?;
if hits.is_empty() && exec.is_none() {
eprintln!("No matches for `{}`", raw);
return Ok(());
}
if let Some(cmd_tpl) = exec {
for path in hits {
let cmd_final = if cmd_tpl.contains("{}") {
cmd_tpl.replace("{}", &path)
} else {
format!("{cmd_tpl} \"{path}\"")
};
let mut parts = cmd_final.splitn(2, ' ');
let prog = parts.next().unwrap();
let args = parts.next().unwrap_or("");
let status = std::process::Command::new(prog)
.args(shlex::split(args).unwrap_or_default())
.status()?;
if !status.success() {
error!(file = %path, "command failed");
}
}
} else {
for p in hits {
println!("{p}");
}
}
Ok(())
}
fn search(conn: &rusqlite::Connection, raw: &str) -> Result<Vec<String>> {
let q = if raw.split_ascii_whitespace().count() == 1
&& !raw.contains(&['"', '\'', ':', '*', '(', ')', '~', '+', '-'][..])
{
format!("{raw}*")
} else {
raw.to_string()
};
let mut stmt = conn.prepare(
r#"
SELECT f.path FROM files_fts
JOIN files f ON f.rowid = files_fts.rowid
WHERE files_fts MATCH ?1
"#,
)?;
let rows = stmt.query_map([&q], |row| row.get::<_, String>(0))?;
Ok(rows.filter_map(Result::ok).collect())
}

View File

@@ -1,3 +1,4 @@
// src/scan.rs (unchanged except tiny doc tweak)
use std::fs;
use std::path::Path;
@@ -7,6 +8,7 @@ use tracing::{debug, info};
use walkdir::WalkDir;
/// Recursively walk `root` and upsert file metadata.
/// Triggers keep the FTS table in sync.
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
let tx = conn.transaction()?;
let mut stmt = tx.prepare(
@@ -38,8 +40,8 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
debug!(file = %path_str, "indexed");
}
drop(stmt); // <- release borrow before commit
tx.commit()?; // can now move tx
drop(stmt);
tx.commit()?;
info!(indexed = count, "scan complete");
Ok(count)
}

View File

@@ -1,144 +0,0 @@
# Marlin Usage Tutorial
Below is a hands-on lab you can run in a throw-away directory.
It shows how Marlins **tags** give you cross-folder “links” that a plain Bash
workflow cant match without resorting to symlinks or scratch scripts.
Everything uses *only the functionality that exists today* (`init / scan / tag`)
plus one `sqlite3` query for discovery.
---
## 0 . Prep
```bash
# make a playground so we don't touch real files
mkdir -p ~/marlin_demo/{Projects/{Alpha,Beta},Media/Photos,Docs}
cd ~/marlin_demo
```
### Create a handful of files
```bash
echo "Alpha draft" > Projects/Alpha/draft.txt
echo "Alpha final" > Projects/Alpha/final.txt
echo "Beta summary" > Projects/Beta/summary.md
echo "Budget spreadsheet" > Docs/budget.ods
echo "Scan of receipt" > Docs/receipt.pdf
echo "fake JPEG header" > Media/Photos/vacation001.jpg
echo "fake JPEG header" > Media/Photos/vacation002.jpg
```
---
## 1 . Initialise & scan
```bash
marlin init
marlin scan ~/marlin_demo
```
*What happened?*
Marlin walked every file under `~/marlin_demo` and upserted rows into `files`.
---
## 2 . Tagging adding cross-folder metadata
### Tag Alpha project files
```bash
marlin tag "~/marlin_demo/Projects/Alpha/**/*.txt" project-alpha
```
### Tag everything Markdown or ODS as **docs**
```bash
marlin tag "~/marlin_demo/**/*.md" docs
marlin tag "~/marlin_demo/**/*.ods" docs
```
### Tag photos
```bash
marlin tag "~/marlin_demo/Media/Photos/**/*.jpg" photos
```
You can layer tags—`vacation001.jpg` now has both `photos` and (later) `trip-2024`
if you choose to add that.
---
## 3 . Discovering files with plain SQL
Theres no `marlin search` command *yet*, but the DB is just SQLite:
```bash
sqlite3 ~/.local/share/marlin/index.db <<'SQL'
.headers on
.mode column
-- show all files tagged 'docs'
SELECT path
FROM files f
JOIN file_tags ft ON ft.file_id = f.id
JOIN tags t ON t.id = ft.tag_id
WHERE t.name = 'docs';
SQL
```
Expected output:
```
path
--------------------------------------------------------------
/home/user/marlin_demo/Projects/Beta/summary.md
/home/user/marlin_demo/Docs/budget.ods
```
Do the same for `project-alpha`:
```bash
sqlite3 ~/.local/share/marlin/index.db "
SELECT path FROM files
JOIN file_tags USING(file_id)
JOIN tags USING(tag_id)
WHERE tags.name = 'project-alpha';
"
```
---
## 4 . Why this beats a pure Bash approach
| Task | With Bash alone | With Marlin tags |
| -------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
| Gather every Alpha file (any extension) scattered across sub-folders | `find ~/Projects -path '*Alpha*'` (works) but blows up if naming scheme changes | One-time glob + `marlin tag ... project-alpha`, then just query by tag. |
| Re-classify files later | Mass-rename or new `find`/`grep` pipeline | `marlin tag` new glob or manual ad-hoc files; DB keeps history (future). |
| Combine orthogonal facets e.g. “docs AND project-alpha” | Complex `find` piped to `grep -F -f list.txt` or symlink forest | Future `marlin search docs AND project-alpha` (for now SQL query). |
| Persist metadata when files move | Must update symlinks / scripts | Scanner sees the move (once watcher lands); tags stay attached by inode/hash. |
Think of tags as **Git branches for files**—cheap, many-to-many, roam across
directories, and live in one place.
---
## 5 . Cleaning up
```bash
rm -rf ~/marlin_demo
sqlite3 ~/.local/share/marlin/index.db "DELETE FROM files; DELETE FROM tags; DELETE FROM file_tags;"
```
*(or simply delete the DB file to start fresh).*
---
### Recap
1. **Scan** every folder once.
2. **Tag** by glob to create semantic “links.”
3. **Query** the DB (today) or use future built-in search (soon).
Even with just these three commands, you get an index that answers questions
plain Bash would need an ever-growing tangle of `find`, `grep`, and symlinks to solve.