This commit is contained in:
thePR0M3TH3AN
2025-05-18 16:02:48 -04:00
parent 6157ac5233
commit f6fca2c0dd
44 changed files with 492 additions and 508 deletions

21
libmarlin/Cargo.toml Normal file
View File

@@ -0,0 +1,21 @@
[package]
name = "libmarlin"
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
anyhow = "1"
chrono = "0.4"
directories = "5"
glob = "0.3"
rusqlite = { version = "0.31", features = ["bundled", "backup"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
walkdir = "2.5"
shlex = "1.3"
shellexpand = "3.1"
serde_json = { version = "1", optional = true }
[features]
json = ["serde_json"]

51
libmarlin/src/config.rs Normal file
View File

@@ -0,0 +1,51 @@
use anyhow::Result;
use directories::ProjectDirs;
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
/// Runtime configuration (currently just the DB path).
#[derive(Debug, Clone)]
pub struct Config {
pub db_path: PathBuf,
}
impl Config {
/// Resolve configuration from environment or derive one per-workspace.
///
/// Priority:
/// 1. `MARLIN_DB_PATH` env-var (explicit override)
/// 2. *Workspace-local* file under XDG data dir
/// (`~/.local/share/marlin/index_<hash>.db`)
/// 3. Fallback to `./index.db` when we cannot locate an XDG dir
pub fn load() -> Result<Self> {
// 1) explicit override
if let Some(val) = std::env::var_os("MARLIN_DB_PATH") {
let p = PathBuf::from(val);
std::fs::create_dir_all(p.parent().expect("has parent"))?;
return Ok(Self { db_path: p });
}
// 2) derive per-workspace DB name from CWD hash
let cwd = std::env::current_dir()?;
let mut h = DefaultHasher::new();
cwd.hash(&mut h);
let digest = h.finish(); // 64-bit
let file_name = format!("index_{digest:016x}.db");
if let Some(dirs) = ProjectDirs::from("io", "Marlin", "marlin") {
let dir = dirs.data_dir();
std::fs::create_dir_all(dir)?;
return Ok(Self {
db_path: dir.join(file_name),
});
}
// 3) very last resort workspace-relative DB
Ok(Self {
db_path: Path::new(&file_name).to_path_buf(),
})
}
}

View File

@@ -0,0 +1,191 @@
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL; -- Use WAL for better concurrency
-- Version 1: Initial Schema (with FTS5-backed search over paths, tags & attrs)
-- Core tables
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
size INTEGER,
mtime INTEGER,
hash TEXT -- file content hash (e.g. SHA256)
);
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL, -- tag segment
parent_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
canonical_id INTEGER REFERENCES tags(id) ON DELETE SET NULL,
UNIQUE(name, parent_id)
);
CREATE TABLE IF NOT EXISTS file_tags (
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY(file_id, tag_id)
);
CREATE TABLE IF NOT EXISTS attributes (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT,
UNIQUE(file_id, key)
);
-- Full-text search
-- Drop any old FTS table, then recreate it as a contentless standalone table
DROP TABLE IF EXISTS files_fts;
CREATE VIRTUAL TABLE files_fts
USING fts5(
path, -- Remove UNINDEXED to enable path searching
tags_text, -- concat of all tag names for this file
attrs_text, -- concat of all key=value attrs
content='', -- Explicitly mark as contentless
tokenize="unicode61 remove_diacritics 2"
);
-- FTS-sync triggers
-- When a file is added
DROP TRIGGER IF EXISTS files_fts_ai_file;
CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id, -- Sets files_fts.rowid to files.id
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
-- When a files path changes
DROP TRIGGER IF EXISTS files_fts_au_file;
CREATE TRIGGER files_fts_au_file
AFTER UPDATE OF path ON files
BEGIN
UPDATE files_fts
SET path = NEW.path
WHERE rowid = NEW.id; -- rowid refers to files_fts.rowid which matches files.id
END;
-- When a file is removed
DROP TRIGGER IF EXISTS files_fts_ad_file;
CREATE TRIGGER files_fts_ad_file
AFTER DELETE ON files
BEGIN
DELETE FROM files_fts WHERE rowid = OLD.id; -- OLD.id from files table
END;
-- When tags are added, replace the entire FTS row
DROP TRIGGER IF EXISTS file_tags_fts_ai;
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- When tags are removed, replace the entire FTS row
DROP TRIGGER IF EXISTS file_tags_fts_ad;
CREATE TRIGGER file_tags_fts_ad
AFTER DELETE ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- When attributes are added, replace the entire FTS row
DROP TRIGGER IF EXISTS attributes_fts_ai;
CREATE TRIGGER attributes_fts_ai
AFTER INSERT ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- When attribute values change, replace the entire FTS row
DROP TRIGGER IF EXISTS attributes_fts_au;
CREATE TRIGGER attributes_fts_au
AFTER UPDATE OF value ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- When attributes are removed, replace the entire FTS row
DROP TRIGGER IF EXISTS attributes_fts_ad;
CREATE TRIGGER attributes_fts_ad
AFTER DELETE ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Versioning & helpful indexes
CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
applied_on TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash);
CREATE INDEX IF NOT EXISTS idx_tags_name_parent ON tags(name, parent_id);
CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id);
CREATE INDEX IF NOT EXISTS idx_attr_file_key ON attributes(file_id, key);

View File

@@ -0,0 +1,92 @@
-- src/db/migrations/0002_update_fts_and_triggers.sql
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL; -- Use WAL for better concurrency
-- Drop old FTS5 triggers so we can fully replace the row on tag/attr changes
DROP TRIGGER IF EXISTS file_tags_fts_ai;
DROP TRIGGER IF EXISTS file_tags_fts_ad;
DROP TRIGGER IF EXISTS attributes_fts_ai;
DROP TRIGGER IF EXISTS attributes_fts_au;
DROP TRIGGER IF EXISTS attributes_fts_ad;
-- Recreate triggers with INSERT OR REPLACE to ensure full reindex:
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER file_tags_fts_ad
AFTER DELETE ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
CREATE TRIGGER attributes_fts_ai
AFTER INSERT ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_au
AFTER UPDATE OF value ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_ad
AFTER DELETE ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(t.name, ' '), '')
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;

View File

@@ -0,0 +1,28 @@
PRAGMA foreign_keys = ON;
-- File-to-file links
CREATE TABLE IF NOT EXISTS links (
id INTEGER PRIMARY KEY,
src_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
dst_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
type TEXT,
UNIQUE(src_file_id, dst_file_id, type)
);
-- Named collections
CREATE TABLE IF NOT EXISTS collections (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS collection_files (
collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
PRIMARY KEY(collection_id, file_id)
);
-- Saved views
CREATE TABLE IF NOT EXISTS views (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
query TEXT NOT NULL
);

View File

@@ -0,0 +1,289 @@
-- src/db/migrations/0004_fix_hierarchical_tags_fts.sql
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
-- Force drop all FTS triggers to ensure they're recreated even if migration is already recorded
DROP TRIGGER IF EXISTS files_fts_ai_file;
DROP TRIGGER IF EXISTS files_fts_au_file;
DROP TRIGGER IF EXISTS files_fts_ad_file;
DROP TRIGGER IF EXISTS file_tags_fts_ai;
DROP TRIGGER IF EXISTS file_tags_fts_ad;
DROP TRIGGER IF EXISTS attributes_fts_ai;
DROP TRIGGER IF EXISTS attributes_fts_au;
DROP TRIGGER IF EXISTS attributes_fts_ad;
-- Create a new trigger for file insertion that uses recursive CTE for full tag paths
CREATE TRIGGER files_fts_ai_file
AFTER INSERT ON files
BEGIN
INSERT INTO files_fts(rowid, path, tags_text, attrs_text)
VALUES (
NEW.id,
NEW.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = NEW.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = NEW.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = NEW.id)
);
END;
-- Recreate the file path update trigger
CREATE TRIGGER files_fts_au_file
AFTER UPDATE OF path ON files
BEGIN
UPDATE files_fts
SET path = NEW.path
WHERE rowid = NEW.id;
END;
-- Recreate the file deletion trigger
CREATE TRIGGER files_fts_ad_file
AFTER DELETE ON files
BEGIN
DELETE FROM files_fts WHERE rowid = OLD.id;
END;
-- Create new trigger for tag insertion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ai
AFTER INSERT ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
-- Create new trigger for tag deletion that uses recursive CTE for full tag paths
CREATE TRIGGER file_tags_fts_ad
AFTER DELETE ON file_tags
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Create new triggers for attribute operations that use recursive CTE for full tag paths
CREATE TRIGGER attributes_fts_ai
AFTER INSERT ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_au
AFTER UPDATE OF value ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = NEW.file_id;
END;
CREATE TRIGGER attributes_fts_ad
AFTER DELETE ON attributes
BEGIN
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f
WHERE f.id = OLD.file_id;
END;
-- Update all existing FTS entries with the new tag-path format
INSERT OR REPLACE INTO files_fts(rowid, path, tags_text, attrs_text)
SELECT f.id, f.path,
(SELECT IFNULL(GROUP_CONCAT(tag_path, ' '), '')
FROM (
WITH RECURSIVE tag_tree(id, name, parent_id, path) AS (
SELECT t.id, t.name, t.parent_id, t.name
FROM tags t
WHERE t.parent_id IS NULL
UNION ALL
SELECT t.id, t.name, t.parent_id, tt.path || '/' || t.name
FROM tags t
JOIN tag_tree tt ON t.parent_id = tt.id
)
SELECT DISTINCT tag_tree.path AS tag_path
FROM file_tags ft
JOIN tag_tree ON ft.tag_id = tag_tree.id
WHERE ft.file_id = f.id
UNION
SELECT t.name AS tag_path
FROM file_tags ft
JOIN tags t ON ft.tag_id = t.id
WHERE ft.file_id = f.id AND t.parent_id IS NULL
)),
(SELECT IFNULL(GROUP_CONCAT(a.key || '=' || a.value, ' '), '')
FROM attributes a
WHERE a.file_id = f.id)
FROM files f;

349
libmarlin/src/db/mod.rs Normal file
View File

@@ -0,0 +1,349 @@
//! Central DB helper connection bootstrap, migrations **and** most
//! data-access helpers (tags, links, collections, saved views, …).
use std::{
fs,
path::{Path, PathBuf},
};
use std::result::Result as StdResult;
use anyhow::{Context, Result};
use chrono::Local;
use rusqlite::{
backup::{Backup, StepResult},
params,
Connection,
OpenFlags,
OptionalExtension,
TransactionBehavior,
};
use tracing::{debug, info, warn};
/* ─── embedded migrations ─────────────────────────────────────────── */
const MIGRATIONS: &[(&str, &str)] = &[
("0001_initial_schema.sql", include_str!("migrations/0001_initial_schema.sql")),
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
];
/* ─── connection bootstrap ────────────────────────────────────────── */
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
let db_path_ref = db_path.as_ref();
let mut conn = Connection::open(db_path_ref)
.with_context(|| format!("failed to open DB at {}", db_path_ref.display()))?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "foreign_keys", "ON")?;
// Wait up to 30 s for a competing writer before giving up
conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked
apply_migrations(&mut conn)?;
Ok(conn)
}
/* ─── migration runner ────────────────────────────────────────────── */
fn apply_migrations(conn: &mut Connection) -> Result<()> {
// Ensure schema_version bookkeeping table exists
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
applied_on TEXT NOT NULL
);",
)?;
// Legacy patch ignore errors if column already exists
let _ = conn.execute("ALTER TABLE schema_version ADD COLUMN applied_on TEXT", []);
// Grab the write-lock up-front so migrations can run uninterrupted
let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
for (fname, sql) in MIGRATIONS {
let version: i64 = fname
.split('_')
.next()
.and_then(|s| s.parse().ok())
.expect("migration filenames start with number");
let already: Option<i64> = tx
.query_row(
"SELECT version FROM schema_version WHERE version = ?1",
[version],
|r| r.get(0),
)
.optional()?;
if already.is_some() {
debug!("migration {} already applied", fname);
continue;
}
info!("applying migration {}", fname);
tx.execute_batch(sql)
.with_context(|| format!("could not apply migration {fname}"))?;
tx.execute(
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
params![version, Local::now().to_rfc3339()],
)?;
}
tx.commit()?;
// sanity warn if any embedded migration got skipped
let mut missing = Vec::new();
for (fname, _) in MIGRATIONS {
let v: i64 = fname.split('_').next().unwrap().parse().unwrap();
let ok: bool = conn
.query_row(
"SELECT 1 FROM schema_version WHERE version = ?1",
[v],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !ok {
missing.push(v);
}
}
if !missing.is_empty() {
warn!("migrations not applied: {:?}", missing);
}
Ok(())
}
/* ─── tag helpers ─────────────────────────────────────────────────── */
pub fn ensure_tag_path(conn: &Connection, path: &str) -> Result<i64> {
let mut parent: Option<i64> = None;
for segment in path.split('/').filter(|s| !s.is_empty()) {
conn.execute(
"INSERT OR IGNORE INTO tags(name, parent_id) VALUES (?1, ?2)",
params![segment, parent],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM tags WHERE name = ?1 AND (parent_id IS ?2 OR parent_id = ?2)",
params![segment, parent],
|r| r.get(0),
)?;
parent = Some(id);
}
parent.ok_or_else(|| anyhow::anyhow!("empty tag path"))
}
pub fn file_id(conn: &Connection, path: &str) -> Result<i64> {
conn.query_row("SELECT id FROM files WHERE path = ?1", [path], |r| r.get(0))
.map_err(|_| anyhow::anyhow!("file not indexed: {}", path))
}
/* ─── attributes ──────────────────────────────────────────────────── */
pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> Result<()> {
conn.execute(
r#"
INSERT INTO attributes(file_id, key, value)
VALUES (?1, ?2, ?3)
ON CONFLICT(file_id, key) DO UPDATE SET value = excluded.value
"#,
params![file_id, key, value],
)?;
Ok(())
}
/* ─── links ───────────────────────────────────────────────────────── */
pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"INSERT INTO links(src_file_id, dst_file_id, type)
VALUES (?1, ?2, ?3)
ON CONFLICT(src_file_id, dst_file_id, type) DO NOTHING",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
conn.execute(
"DELETE FROM links
WHERE src_file_id = ?1
AND dst_file_id = ?2
AND (type IS ?3 OR type = ?3)",
params![src_file_id, dst_file_id, link_type],
)?;
Ok(())
}
pub fn list_links(
conn: &Connection,
pattern: &str,
direction: Option<&str>,
link_type: Option<&str>,
) -> Result<Vec<(String, String, Option<String>)>> {
let like_pattern = pattern.replace('*', "%");
// Files matching pattern
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
let rows = stmt
.query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))?
.collect::<Result<Vec<_>, _>>()?;
let mut out = Vec::new();
for (fid, fpath) in rows {
let (src_col, dst_col) = match direction {
Some("in") => ("dst_file_id", "src_file_id"),
_ => ("src_file_id", "dst_file_id"),
};
let sql = format!(
"SELECT f2.path, l.type
FROM links l
JOIN files f2 ON f2.id = l.{dst_col}
WHERE l.{src_col} = ?1
AND (?2 IS NULL OR l.type = ?2)",
);
let mut stmt2 = conn.prepare(&sql)?;
let links = stmt2
.query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?)))?
.collect::<Result<Vec<_>, _>>()?;
for (other, typ) in links {
out.push((fpath.clone(), other, typ));
}
}
Ok(out)
}
pub fn find_backlinks(
conn: &Connection,
pattern: &str,
) -> Result<Vec<(String, Option<String>)>> {
let like = pattern.replace('*', "%");
let mut stmt = conn.prepare(
"SELECT f1.path, l.type
FROM links l
JOIN files f1 ON f1.id = l.src_file_id
JOIN files f2 ON f2.id = l.dst_file_id
WHERE f2.path LIKE ?1",
)?;
let rows = stmt.query_map([like], |r| {
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
})?;
let out = rows.collect::<StdResult<Vec<_>, _>>()?; // rusqlite → anyhow via `?`
Ok(out)
}
/* ─── NEW: collections helpers ────────────────────────────────────── */
pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> {
conn.execute(
"INSERT OR IGNORE INTO collections(name) VALUES (?1)",
params![name],
)?;
conn.query_row(
"SELECT id FROM collections WHERE name = ?1",
params![name],
|r| r.get(0),
)
.context("collection lookup failed")
}
pub fn add_file_to_collection(conn: &Connection, coll_id: i64, file_id: i64) -> Result<()> {
conn.execute(
"INSERT OR IGNORE INTO collection_files(collection_id, file_id)
VALUES (?1, ?2)",
params![coll_id, file_id],
)?;
Ok(())
}
pub fn list_collection(conn: &Connection, name: &str) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
r#"SELECT f.path
FROM collections c
JOIN collection_files cf ON cf.collection_id = c.id
JOIN files f ON f.id = cf.file_id
WHERE c.name = ?1
ORDER BY f.path"#,
)?;
let rows = stmt.query_map([name], |r| r.get::<_, String>(0))?;
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(list)
}
/* ─── NEW: saved views (smart folders) ────────────────────────────── */
pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
conn.execute(
"INSERT INTO views(name, query)
VALUES (?1, ?2)
ON CONFLICT(name) DO UPDATE SET query = excluded.query",
params![name, query],
)?;
Ok(())
}
pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> {
let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(list)
}
pub fn view_query(conn: &Connection, name: &str) -> Result<String> {
conn.query_row(
"SELECT query FROM views WHERE name = ?1",
[name],
|r| r.get::<_, String>(0),
)
.context(format!("no view called '{name}'"))
}
/* ─── backup / restore helpers ────────────────────────────────────── */
pub fn backup<P: AsRef<Path>>(db_path: P) -> Result<PathBuf> {
let src = db_path.as_ref();
let dir = src
.parent()
.ok_or_else(|| anyhow::anyhow!("invalid DB path: {}", src.display()))?
.join("backups");
fs::create_dir_all(&dir)?;
let stamp = Local::now().format("%Y-%m-%d_%H-%M-%S");
let dst = dir.join(format!("backup_{stamp}.db"));
let src_conn = Connection::open_with_flags(src, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let mut dst_conn = Connection::open(&dst)?;
let bk = Backup::new(&src_conn, &mut dst_conn)?;
while let StepResult::More = bk.step(100)? {}
Ok(dst)
}
pub fn restore<P: AsRef<Path>>(backup_path: P, live_db_path: P) -> Result<()> {
fs::copy(&backup_path, &live_db_path)?;
Ok(())
}
/* ─── tests ───────────────────────────────────────────────────────── */
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn migrations_apply_in_memory() {
open(":memory:").expect("all migrations apply");
}
}

122
libmarlin/src/lib.rs Normal file
View File

@@ -0,0 +1,122 @@
//! libmarlin public API surface for the Marlin core.
//!
//! Down-stream crates (`cli-bin`, `tui-bin`, tests, plugins) should depend
//! *only* on the helpers re-exported here, never on internal modules
//! directly. That gives us room to refactor internals without breaking
//! callers.
#![deny(warnings)]
pub mod config; // moved as-is
pub mod db; // moved as-is
pub mod logging; // expose the logging init helper
pub mod scan; // moved as-is
pub mod utils; // hosts determine_scan_root() & misc helpers
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
use walkdir::WalkDir;
/// Primary façade open a workspace then call helper methods.
///
/// Most methods simply wrap what the CLI used to do directly; more will be
/// filled in sprint-by-sprint.
pub struct Marlin {
#[allow(dead_code)]
cfg: config::Config,
conn: Connection,
}
impl Marlin {
/// Load configuration from env / workspace and open (or create) the DB.
pub fn open_default() -> Result<Self> {
let cfg = config::Config::load()?;
let conn = db::open(&cfg.db_path)?;
Ok(Self { cfg, conn })
}
/// Open an explicit DB path handy for tests or headless tools.
pub fn open_at<P: AsRef<Path>>(path: P) -> Result<Self> {
let cfg = config::Config { db_path: path.as_ref().to_path_buf() };
let conn = db::open(&cfg.db_path)?;
Ok(Self { cfg, conn })
}
/// Recursively index one or more directories.
pub fn scan<P: AsRef<Path>>(&mut self, paths: &[P]) -> Result<usize> {
let mut total = 0usize;
for p in paths {
total += scan::scan_directory(&mut self.conn, p.as_ref())?;
}
Ok(total)
}
/// Attach a hierarchical tag (`foo/bar`) to every file that matches the
/// glob pattern. Returns the number of files that actually got updated.
pub fn tag(&mut self, pattern: &str, tag_path: &str) -> Result<usize> {
use glob::Pattern;
// 1) ensure tag hierarchy exists
let leaf_tag_id = db::ensure_tag_path(&self.conn, tag_path)?;
// 2) collect leaf + ancestors
let mut tag_ids = Vec::new();
let mut current = Some(leaf_tag_id);
while let Some(id) = current {
tag_ids.push(id);
current = self.conn.query_row(
"SELECT parent_id FROM tags WHERE id=?1",
[id],
|r| r.get::<_, Option<i64>>(0),
)?;
}
// 3) walk the file tree and upsert `file_tags`
let expanded = shellexpand::tilde(pattern).into_owned();
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{expanded}`"))?;
let root = utils::determine_scan_root(&expanded);
let mut stmt_file = self.conn.prepare("SELECT id FROM files WHERE path=?1")?;
let mut stmt_insert = self.conn.prepare(
"INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
)?;
let mut changed = 0usize;
for entry in WalkDir::new(&root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
{
let p = entry.path().to_string_lossy();
if !pat.matches(&p) { continue; }
match stmt_file.query_row([p.as_ref()], |r| r.get::<_, i64>(0)) {
Ok(fid) => {
let mut newly = false;
for &tid in &tag_ids {
if stmt_insert.execute([fid, tid])? > 0 { newly = true; }
}
if newly { changed += 1; }
}
Err(_) => { /* ignore nonindexed files */ }
}
}
Ok(changed)
}
/// FTS5 search → list of matching paths.
pub fn search(&self, query: &str) -> Result<Vec<String>> {
let mut stmt = self.conn.prepare(
"SELECT path FROM files_fts WHERE files_fts MATCH ?1 ORDER BY rank",
)?;
let rows = stmt.query_map([query], |r| r.get::<_, String>(0))?
.collect::<Result<Vec<_>, _>>()?;
Ok(rows)
}
/// Borrow the underlying SQLite connection (read-only).
pub fn conn(&self) -> &Connection { &self.conn }
}

17
libmarlin/src/logging.rs Normal file
View File

@@ -0,0 +1,17 @@
use tracing_subscriber::{fmt, EnvFilter};
/// Initialise global tracing subscriber.
///
/// Reads `RUST_LOG` for filtering, falls back to `info`.
pub fn init() {
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
// All tracing output (INFO, WARN, ERROR …) now goes to *stderr* so the
// integration tests can assert on warnings / errors reliably.
fmt()
.with_target(false) // hide module targets
.with_level(true) // include log level
.with_env_filter(filter) // respect RUST_LOG
.with_writer(std::io::stderr) // <-- NEW: send to stderr
.init();
}

47
libmarlin/src/scan.rs Normal file
View File

@@ -0,0 +1,47 @@
// src/scan.rs (unchanged except tiny doc tweak)
use std::fs;
use std::path::Path;
use anyhow::Result;
use rusqlite::{params, Connection};
use tracing::{debug, info};
use walkdir::WalkDir;
/// Recursively walk `root` and upsert file metadata.
/// Triggers keep the FTS table in sync.
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
let tx = conn.transaction()?;
let mut stmt = tx.prepare(
r#"
INSERT INTO files(path, size, mtime)
VALUES (?1, ?2, ?3)
ON CONFLICT(path) DO UPDATE
SET size = excluded.size,
mtime = excluded.mtime
"#,
)?;
let mut count = 0usize;
for entry in WalkDir::new(root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
{
let meta = fs::metadata(entry.path())?;
let size = meta.len() as i64;
let mtime = meta
.modified()?
.duration_since(std::time::UNIX_EPOCH)?
.as_secs() as i64;
let path_str = entry.path().to_string_lossy();
stmt.execute(params![path_str, size, mtime])?;
count += 1;
debug!(file = %path_str, "indexed");
}
drop(stmt);
tx.commit()?;
info!(indexed = count, "scan complete");
Ok(count)
}

25
libmarlin/src/utils.rs Normal file
View File

@@ -0,0 +1,25 @@
//! Misc shared helpers.
use std::path::PathBuf;
/// Determine a filesystem root to limit recursive walking on glob scans.
pub fn determine_scan_root(pattern: &str) -> PathBuf {
let first_wild = pattern
.find(|c| matches!(c, '*' | '?' | '['))
.unwrap_or(pattern.len());
let mut root = PathBuf::from(&pattern[..first_wild]);
while root
.as_os_str()
.to_string_lossy()
.contains(|c| matches!(c, '*' | '?' | '['))
{
root = root.parent().map(|p| p.to_path_buf()).unwrap_or_default();
}
if root.as_os_str().is_empty() {
PathBuf::from(".")
} else {
root
}
}