Update dependencies and add new features for improved functionality

- Updated Cargo.lock and Cargo.toml to include new dependencies
- Added new files for backup and watcher functionality in libmarlin
- Introduced integration tests and documentation updates
- Set workspace resolver to version 2 for better dependency resolution
This commit is contained in:
thePR0M3TH3AN
2025-05-19 18:14:42 -04:00
parent 6125acb4d1
commit 2f97bd8c3f
23 changed files with 2567 additions and 50 deletions

View File

@@ -7,11 +7,14 @@
#![deny(warnings)]
pub mod config; // as-is
pub mod db; // as-is
pub mod logging; // expose the logging init helper
pub mod scan; // as-is
pub mod utils; // hosts determine_scan_root() & misc helpers
pub mod backup;
pub mod config;
pub mod db;
pub mod error;
pub mod logging;
pub mod scan;
pub mod utils;
pub mod watcher;
#[cfg(test)]
mod utils_tests;
@@ -25,15 +28,17 @@ mod logging_tests;
mod db_tests;
#[cfg(test)]
mod facade_tests;
#[cfg(test)]
mod watcher_tests;
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::{fs, path::Path};
use std::{fs, path::Path, sync::{Arc, Mutex}};
/// Main handle for interacting with a Marlin database.
pub struct Marlin {
#[allow(dead_code)]
cfg: config::Config,
cfg: config::Config,
conn: Connection,
}
@@ -41,7 +46,7 @@ impl Marlin {
/// Open using the default config (env override or XDG/CWD fallback),
/// ensuring parent directories exist and applying migrations.
pub fn open_default() -> Result<Self> {
// 1) Load configuration (checks MARLIN_DB_PATH, XDG_DATA_HOME, or falls back to ./index_<hash>.db)
// 1) Load configuration
let cfg = config::Config::load()?;
// 2) Ensure the DB's parent directory exists
if let Some(parent) = cfg.db_path.parent() {
@@ -86,7 +91,7 @@ impl Marlin {
// 1) ensure tag hierarchy
let leaf = db::ensure_tag_path(&self.conn, tag_path)?;
// 2) collect it plus all ancestors
// 2) collect leaf + ancestors
let mut tag_ids = Vec::new();
let mut cur = Some(leaf);
while let Some(id) = cur {
@@ -98,41 +103,37 @@ impl Marlin {
)?;
}
// 3) pick matching files _from the DB_ (not from the FS!)
// 3) match files by glob against stored paths
let expanded = shellexpand::tilde(pattern).into_owned();
let pat = Pattern::new(&expanded)
.with_context(|| format!("Invalid glob pattern `{}`", expanded))?;
// pull down all (id, path)
let mut stmt_all = self.conn.prepare("SELECT id, path FROM files")?;
let rows = stmt_all.query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?;
let mut stmt_insert = self.conn.prepare(
let mut stmt_ins = self.conn.prepare(
"INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
)?;
let mut changed = 0;
for row in rows {
let (fid, path_str): (i64, String) = row?;
let matches = if expanded.contains(std::path::MAIN_SEPARATOR) {
// pattern includes a slash — match full path
let is_match = if expanded.contains(std::path::MAIN_SEPARATOR) {
pat.matches(&path_str)
} else {
// no slash — match just the file name
std::path::Path::new(&path_str)
Path::new(&path_str)
.file_name()
.and_then(|n| n.to_str())
.map(|n| pat.matches(n))
.unwrap_or(false)
};
if !matches {
if !is_match {
continue;
}
// upsert this tag + its ancestors
let mut newly = false;
for &tid in &tag_ids {
if stmt_insert.execute([fid, tid])? > 0 {
if stmt_ins.execute([fid, tid])? > 0 {
newly = true;
}
}
@@ -140,50 +141,36 @@ impl Marlin {
changed += 1;
}
}
Ok(changed)
}
/// Fulltext search over path, tags, and attrs (with fallback).
/// Full-text search over path, tags, and attrs, with substring fallback.
pub fn search(&self, query: &str) -> Result<Vec<String>> {
let mut stmt = self.conn.prepare(
r#"
SELECT f.path
FROM files_fts
JOIN files f ON f.rowid = files_fts.rowid
WHERE files_fts MATCH ?1
ORDER BY rank
"#,
"SELECT f.path FROM files_fts JOIN files f ON f.rowid = files_fts.rowid WHERE files_fts MATCH ?1 ORDER BY rank",
)?;
let mut hits = stmt
.query_map([query], |r| r.get(0))?
.collect::<Result<Vec<_>, _>>()?;
let mut hits = stmt.query_map([query], |r| r.get(0))?
.collect::<std::result::Result<Vec<_>, rusqlite::Error>>()?;
// graceful fallback: substring scan when no FTS hits and no `:` in query
if hits.is_empty() && !query.contains(':') {
hits = self.fallback_search(query)?;
}
Ok(hits)
}
/// private helper: scan `files` table + small files for a substring
fn fallback_search(&self, term: &str) -> Result<Vec<String>> {
let needle = term.to_lowercase();
let mut stmt = self.conn.prepare("SELECT path FROM files")?;
let rows = stmt.query_map([], |r| r.get(0))?;
let mut out = Vec::new();
for path_res in rows {
let p: String = path_res?; // Explicit type annotation added
// match in the path itself?
for res in rows {
let p: String = res?;
if p.to_lowercase().contains(&needle) {
out.push(p.clone());
continue;
}
// otherwise read small files
if let Ok(meta) = fs::metadata(&p) {
if meta.len() <= 65_536 {
if meta.len() <= 65_536 {
if let Ok(body) = fs::read_to_string(&p) {
if body.to_lowercase().contains(&needle) {
out.push(p.clone());
@@ -195,8 +182,27 @@ impl Marlin {
Ok(out)
}
/// Borrow the underlying SQLite connection (read-only).
/// Borrow the raw SQLite connection.
pub fn conn(&self) -> &Connection {
&self.conn
}
/// Spawn a file-watcher that indexes changes in real time.
pub fn watch<P: AsRef<Path>>(
&mut self,
path: P,
config: Option<watcher::WatcherConfig>,
) -> Result<watcher::FileWatcher> {
let cfg = config.unwrap_or_default();
let p = path.as_ref().to_path_buf();
let new_conn = db::open(&self.cfg.db_path)
.context("opening database for watcher")?;
let watcher_db = Arc::new(Mutex::new(db::Database::new(new_conn)));
let mut owned_w = watcher::FileWatcher::new(vec![p], cfg)?;
owned_w.with_database(watcher_db); // Modifies owned_w in place
owned_w.start()?; // Start the watcher after it has been fully configured
Ok(owned_w) // Return the owned FileWatcher
}
}