diff --git a/Cargo.toml b/Cargo.toml index e69de29..5ecabd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "marlin" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5.2", features = ["derive"] } +directories = "5.0" +glob = "0.3" +rusqlite = { version = "0.31.0", features = ["bundled"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] } +walkdir = "2.5" diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c9fa43 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# 1. Build +cargo build --release + +# 2. Initialise DB (idempotent) +./target/release/marlin init + +# 3. Scan a directory +./target/release/marlin scan ~/Pictures + +# 4. Tag all JPEGs in Pictures +./target/release/marlin tag "~/Pictures/**/*.jpg" vacation diff --git a/features.md b/features.md new file mode 100644 index 0000000..a1165ff --- /dev/null +++ b/features.md @@ -0,0 +1,186 @@ +# Marlin – Metadata‑Driven File Explorer + +*Version 2 – 12 May 2025* + +--- + +## 1  Key Features & Functionality + +| Feature Area | Capabilities | +| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Tagging System** | • Unlimited, hierarchical or flat tags.
• Alias/synonym support with precedence rules (admin‑defined canonical name).
• **Bulk tag editing** via multi‑select context menu.
• Folder‑to‑Tag import with optional *watch & sync* mode so new sub‑folders inherit tags automatically. | +| **Custom Metadata Attributes** | • User‑defined fields (text, number, date, enum, boolean).
• Per‑template **Custom Metadata Schemas** (e.g. *Photo* → *Date, Location*). | +| **File Relationships** | • Typed, directional or bidirectional links (*related to*, *duplicate of*, *cites*…).
• Plugin API can register new relationship sets. | +| **Version Control for Metadata** | • Every change logged; unlimited roll‑back.
• Side‑by‑side diff viewer and *blame* panel showing *who/when/what*.
• Offline edits stored locally and merged (Git‑style optimistic merge with conflict prompts). | +| **Advanced Search & Smart Folders** | • Structured query syntax: `tag:ProjectX AND author:Alice`.
• Natural‑language search (*"files Alice edited last month"*) with toggle to exact mode.
• Visual Query Builder showing live query string.
• Saved queries appear as virtual “smart folders” that update in real‑time. | +| **User Interface** | • Sidebar: tags, attributes, relationships.
• Drag‑and‑drop tagging; inline metadata editor.
• Search bar with auto‑complete (Bloom filter backed).
• **Dual View Mode** – metadata vs traditional folder; remembers preference per location.
• **Interactive 60‑second tour** on first launch plus contextual tooltip help. | +| **Collaboration** | • Real‑time metadata sync across devices via cloud or self‑hosted relay.
• Conflict handling as per Version Control.
• Role‑based permissions (read / write / admin) on tags & attributes. | +| **Performance & Scale** | • Sharded/distributed index optional for >1 M files.
• Query cache with LRU eviction.
• Target metrics (100 k files): cold start ≤ 3 s, complex query ≤ 150 ms (stretch 50 ms). | +| **Backup & Restore** | • Scheduled encrypted backups; export to JSON / XML.
• One‑click restore from any point‑in‑time snapshot. | +| **Extensibility** | • Plug‑in system (TypeScript/JS) – see §2.4.
• Python scripting hook for automation and batch tasks.
• REST/IPC API for external tools. | + +--- + +## 2  Technical Implementation + +### 2.1  Core Stack + +| Component | Primary Choice | Notes | +| -------------- | -------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------- | +| File Manager | **Dolphin (KDE)** KIO‑based plug‑ins | GTK users can install a Nautilus extension (feature‑parity subset). | +| Metadata Store | **SQLite + FTS5** (single‑user) → optional **LiteFS/Postgres** for replication & multi‑user scale. | Per‑row AES‑GCM encryption for sensitive fields; keys stored in OS keyring. | +| Indexer Daemon | Rust service using `notify` (inotify on Linux, FSEvents on macOS). | 100 ms debounce batches, async SQLite writes. | +| Cache | In‑memory LRU + Bloom filter for auto‑complete. | | + +### 2.2  Database Schema (simplified) + +```text +files(id PK, path, inode, size, mtime, ctime, hash) +tags(id PK, name, parent_id, canonical_id) +file_tags(file_id FK, tag_id FK) +attributes(id PK, file_id FK, key, value, value_type) +relationships(id PK, src_file_id FK, dst_file_id FK, rel_type, direction) +change_log(change_id PK, object_table, object_id, op, actor, ts, payload_json) +``` + +### 2.3  Sync & Conflict Resolution + +1. Each client appends to **change\_log** (CRDT‑compatible delta). +2. Delta sync via WebSocket; server merges and re‑broadcasts. +3. Conflicts → *Conflict Queue* UI (choose theirs / mine / merge). + +### 2.4  Plugin API (TypeScript) + +```ts +export interface MarlinPlugin { + onInit(ctx: CoreContext): void; + extendSchema?(db: Database): void; // e.g. add new relationship table + addCommands?(ui: UIContext): void; // register menus, actions +} +``` + +Plugins run in a sandboxed process with whitelisted IPC calls. + +--- + +## 3  UX & Accessibility + +* **Keyboard‑only workflow** audit (Tab / Shift‑Tab / Space toggles). +* High‑contrast theme; adheres to WCAG 2.1 AA. +* `Ctrl+Alt+V` toggles Dual View. +* Generated query string shown live under Visual Builder – educates power users. + +--- + +## 4  Performance Budget + +| Metric | MVP | Stretch | +| ------------------------ | --------- | ---------- | +| Cold start (100 k files) | ≤ 3 s | 1 s | +| Complex AND/OR query | ≤ 150 ms | 50 ms | +| Sustained inserts | 5 k ops/s | 20 k ops/s | + +Benchmarks run nightly; regressions block merge. + +--- + +## 5  Security & Privacy + +* **Role‑based ACL** on tags/attributes. +* Per‑change audit trail; logs rotated to cold storage (≥ 90 days online). +* Plugins confined by seccomp/AppArmor; no direct disk/network unless declared. + +--- + +## 6  Packaging & Distribution + +* **Flatpak** (GNOME/KDE) and **AppImage** for portable builds. +* Background service runs as a systemd user unit: `--user marlin-indexerd.service`. +* CLI (`marlin-cli`) packaged for headless servers & CI. + +--- + +## 7  Roadmap + +| Milestone | Scope | Timeline | +| --------- | ----------------------------------------------------------------------------- | -------- | +| **M1** | Tagging, attributes, virtual folders, SQLite, Dolphin plug‑in | 6 weeks | +| **M2** | Sync service, version control, CLI | +6 weeks | +| **M3** | NLP search, Visual Builder, distributed index prototype | +6 weeks | +| **M4** | Plugin marketplace, enterprise auth (LDAP/OIDC), mobile companion (view‑only) | +8 weeks | + +--- + +## 8  Branding + +* **Name**: **Marlin** – fast, precise. +* Icon: stylised sailfish fin forming a folder corner. +* Tagline: *“Cut through clutter.”* +* Domain: `marlin‑explorer.io` (availability checked 2025‑05‑12). + +--- + +## 9  Quick‑Win Checklist (Sprint 0) + +* [ ] Implement bulk metadata editor UI +* [ ] Write conflict‑resolution spec & unit tests +* [ ] Build diff viewer prototype +* [ ] Keyboard‑only navigation audit +* [ ] Establish performance CI with sample 100 k file corpus + +--- + +--- + +## 10 Development Plan (Outline) + +### 10.1 Process & Methodology + +* **Framework** – 2‑week Scrum sprints with Jira backlog, GitHub Projects mirror for public issues. +* **Branching** – Trunk‑based: feature branches → PR → required CI & code‑review approvals (2).*Main* auto‑deploys nightly Flatpak. +* **Definition of Done** – Code + unit tests + docs + passing CI + demo video (for UI work). +* **CI/CD** – GitHub Actions matrix (Ubuntu 22.04, KDE Neon, Fedora 39) → Flatpak / AppImage artefacts, `cargo clippy`, coverage gate ≥ 85 %. + +### 10.2 Team & Roles (FTE‑equivalent) + +| Role | Core Skills | Allocation | +| ----------------------------- | -------------------------------- | ---------- | +| Lead Engineer | Rust, Qt/Kirigami, KIO | 1.0 | +| Backend Engineer | Rust, LiteFS/Postgres, WebSocket | 1.0 | +| Full‑stack / Plug‑in Engineer | TypeScript, Node, IPC | 0.8 | +| UX / QA | Figma, accessibility, Playwright | 0.5 | +| DevOps (fractional) | CI, Flatpak, security hardening | 0.2 | + +### 10.3 Roadmap → Sprint‑level Tasks + +| Sprint | Goal | Key Tasks | Exit Criteria | +| ---------------------- | -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | +| **S0 (2 wks)** | Project bootstrap | • Repo + CI skeleton
• SQLite schema + migrations
• `marlin-cli init` & basic scan
• Hyperfine perf baseline | CLI scans dir; tests pass; artefact builds | +| **S1–3 (M1, 6 wks)** | Tagging + virtual folders MVP | • Indexer daemon in Rust
• CRUD tags/attributes via CLI & DB
• Dolphin plug‑in: sidebar + tag view
• KIO `tags://` virtual folder
• Bulk‑edit dialog | 100 k‑file corpus cold‑start ≤ 3 s; user can tag files & navigate `tags://Urgent` | +| **S4–6 (M2, 6 wks)** | Sync & version control | • Change‑log table + diff viewer
• LiteFS replication PoC
• WebSocket delta sync
• Conflict queue UI + last‑write‑wins fallback | Two devices sync metadata in <1 s round‑trip; rollback works | +| **S7–9 (M3, 6 wks)** | NLP search & Visual Builder | • Integrate Tantivy FTS + ONNX intent model
• Toggle exact vs natural search
• QML Visual Builder with live query string | NL query "docs Alice edited last week" returns expected set in ≤ 300 ms | +| **S10–13 (M4, 8 wks)** | Plug‑in marketplace & mobile companion | • IPC sandbox + manifest spec
• Sample plug‑ins (image EXIF auto‑tagger)
• Flutter read‑only client
• LDAP/OIDC enterprise auth | First external plug‑in published; mobile app lists smart folders | + +### 10.4 Tooling & Infrastructure + +* **Issue tracking** – Jira → labels `component/indexer`, `component/ui`. +* **Docs** – mkdocs‑material hosted on GitHub Pages; automatic diagram generation via `cargo doc` + Mermaid. +* **Nightly Perf Benchmarks** – Run in CI against 10 k, 100 k, 1 M synthetic corpora; fail build if P95 query > target. +* **Security** – Dependabot, Trivy scans, optional SLSA level 2 provenance for releases. + +### 10.5 Risks & Mitigations + +| Risk | Impact | Mitigation | +| ------------------------------ | ---------------- | --------------------------------------------------------------------------- | +| CRDT complexity | Delays M2 | Ship LWW first; schedule CRDT refactor post‑launch | +| File system event overflow | Index corruption | Debounce & auto‑fallback to full rescan; alert user | +| Cross‑distro packaging pain | Adoption drops | Stick to Flatpak; AppImage only for power users; collect telemetry (opt‑in) | +| Scaling >1 M files on slow HDD | Perf complaints | Offer "index on SSD" wizard; tune FTS page cache | + +### 10.6 Budget & Timeline Snapshot + +* **Total dev time** ≈ 30 weeks. +* **Buffer** +10 % (3 weeks) for holidays & unknowns → **33 weeks** (\~8 months). +* **Rough budget** (3 FTE avg × 33 wks × \$150 k/yr) ≈ **\$285 k** payroll + \$15 k ops / tooling. + +--- \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..c6c4683 --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,29 @@ +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; + +/// Marlin – metadata-driven file explorer (CLI utilities) +#[derive(Parser, Debug)] +#[command(author, version, about)] +pub struct Cli { + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand, Debug)] +pub enum Commands { + /// Initialise the database (idempotent) + Init, + /// Scan a directory and populate the file index + Scan { + /// Directory to walk + path: PathBuf, + }, + /// Tag files matching a glob pattern + Tag { + /// Glob pattern (quote to avoid shell expansion) + pattern: String, + /// Tag name + tag: String, + }, +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..0cd4bc4 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,31 @@ +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use directories::ProjectDirs; + +/// Runtime configuration (currently just the DB path). +#[derive(Debug, Clone)] +pub struct Config { + pub db_path: PathBuf, +} + +impl Config { + /// Resolve configuration from environment or XDG directories. + pub fn load() -> Result { + let db_path = std::env::var_os("MARLIN_DB_PATH") + .map(PathBuf::from) + .or_else(|| { + ProjectDirs::from("io", "Marlin", "marlin") + .map(|dirs| dirs.data_dir().join("index.db")) + }) + .unwrap_or_else(|| Path::new("index.db").to_path_buf()); + + std::fs::create_dir_all( + db_path + .parent() + .expect("db_path should always have a parent directory"), + )?; + + Ok(Self { db_path }) + } +} diff --git a/src/db/migrations.sql b/src/db/migrations.sql new file mode 100644 index 0000000..863be5f --- /dev/null +++ b/src/db/migrations.sql @@ -0,0 +1,22 @@ +PRAGMA foreign_keys = ON; + +CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY, + path TEXT NOT NULL UNIQUE, + size INTEGER, + mtime INTEGER +); + +CREATE TABLE IF NOT EXISTS tags ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE +); + +CREATE TABLE IF NOT EXISTS file_tags ( + file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, + tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY (file_id, tag_id) +); + +CREATE INDEX IF NOT EXISTS idx_files_path ON files(path); +CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id); diff --git a/src/db/mod.rs b/src/db/mod.rs new file mode 100644 index 0000000..15efa4b --- /dev/null +++ b/src/db/mod.rs @@ -0,0 +1,28 @@ +use std::path::Path; + +use anyhow::Result; +use rusqlite::{params, Connection}; + +const MIGRATIONS_SQL: &str = include_str!("migrations.sql"); + +/// Open (or create) the SQLite database and run embedded migrations. +pub fn open>(db_path: P) -> Result { + let mut conn = Connection::open(db_path)?; + conn.pragma_update(None, "journal_mode", "WAL")?; + conn.execute_batch(MIGRATIONS_SQL)?; + Ok(conn) +} + +/// Ensure a tag exists, returning its id. +pub fn ensure_tag(conn: &Connection, tag: &str) -> Result { + conn.execute( + "INSERT OR IGNORE INTO tags(name) VALUES (?1)", + params![tag], + )?; + let id: i64 = conn.query_row( + "SELECT id FROM tags WHERE name = ?1", + params![tag], + |row| row.get(0), + )?; + Ok(id) +} diff --git a/src/logging.rs b/src/logging.rs new file mode 100644 index 0000000..a0141ed --- /dev/null +++ b/src/logging.rs @@ -0,0 +1,13 @@ +use tracing_subscriber::{fmt, EnvFilter}; + +/// Initialise global tracing subscriber. +/// +/// Reads `RUST_LOG` for filtering, falls back to `info`. +pub fn init() { + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + fmt() + .with_target(false) + .with_level(true) + .with_env_filter(filter) + .init(); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..d67043f --- /dev/null +++ b/src/main.rs @@ -0,0 +1,60 @@ +mod cli; +mod config; +mod db; +mod logging; +mod scan; + +use anyhow::Result; +use cli::{Cli, Commands}; +use glob::glob; +use rusqlite::params; +use tracing::{error, info}; + +fn main() -> Result<()> { + logging::init(); + + let args = Cli::parse(); + let cfg = config::Config::load()?; + let conn = db::open(&cfg.db_path)?; + + match args.command { + Commands::Init => { + info!("database initialised at {}", cfg.db_path.display()); + } + Commands::Scan { path } => { + scan::scan_directory(&conn, &path)?; + } + Commands::Tag { pattern, tag } => { + apply_tag(&conn, &pattern, &tag)?; + } + } + + Ok(()) +} + +/// Apply `tag` to every file that matches `pattern`. +fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()> { + let tag_id = db::ensure_tag(conn, tag)?; + let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?; + let mut stmt_insert = conn.prepare( + "INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)", + )?; + + for entry in glob(pattern)? { + match entry { + Ok(path) => { + let path_str = path.to_string_lossy(); + if let Ok(file_id) = + stmt_file.query_row(params![path_str], |row| row.get::<_, i64>(0)) + { + stmt_insert.execute(params![file_id, tag_id])?; + info!(file = %path_str, tag = tag, "tagged"); + } else { + error!(file = %path_str, "file not in index – run `marlin scan` first"); + } + } + Err(e) => error!(error = %e, "glob error"), + } + } + Ok(()) +} diff --git a/src/scan.rs b/src/scan.rs new file mode 100644 index 0000000..8e0f4cb --- /dev/null +++ b/src/scan.rs @@ -0,0 +1,41 @@ +use std::fs; +use std::path::Path; + +use anyhow::Result; +use rusqlite::{params, Connection}; +use tracing::{debug, info}; +use walkdir::WalkDir; + +/// Recursively walk `root` and upsert file metadata. +pub fn scan_directory(conn: &Connection, root: &Path) -> Result { + let tx = conn.transaction()?; + let mut stmt = tx.prepare( + r#" + INSERT INTO files(path, size, mtime) + VALUES (?1, ?2, ?3) + ON CONFLICT(path) DO UPDATE + SET size = excluded.size, + mtime = excluded.mtime + "#, + )?; + + let mut count = 0usize; + for entry in WalkDir::new(root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file()) + { + let meta = fs::metadata(entry.path())?; + let size = meta.len() as i64; + let mtime = meta + .modified()? + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as i64; + + let path_str = entry.path().to_string_lossy(); + stmt.execute(params![path_str, size, mtime])?; + count += 1; + debug!(file = %path_str, "indexed"); + } + + tx.commit()?; + info!(indexed = count, "scan complete"); + Ok(count) +}