diff --git a/Cargo.toml b/Cargo.toml
index e69de29..5ecabd2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "marlin"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow = "1.0"
+clap = { version = "4.5.2", features = ["derive"] }
+directories = "5.0"
+glob = "0.3"
+rusqlite = { version = "0.31.0", features = ["bundled"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
+walkdir = "2.5"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7c9fa43
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# 1. Build
+cargo build --release
+
+# 2. Initialise DB (idempotent)
+./target/release/marlin init
+
+# 3. Scan a directory
+./target/release/marlin scan ~/Pictures
+
+# 4. Tag all JPEGs in Pictures
+./target/release/marlin tag "~/Pictures/**/*.jpg" vacation
diff --git a/features.md b/features.md
new file mode 100644
index 0000000..a1165ff
--- /dev/null
+++ b/features.md
@@ -0,0 +1,186 @@
+# Marlin – Metadata‑Driven File Explorer
+
+*Version 2 – 12 May 2025*
+
+---
+
+## 1 Key Features & Functionality
+
+| Feature Area | Capabilities |
+| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Tagging System** | • Unlimited, hierarchical or flat tags.
• Alias/synonym support with precedence rules (admin‑defined canonical name).
• **Bulk tag editing** via multi‑select context menu.
• Folder‑to‑Tag import with optional *watch & sync* mode so new sub‑folders inherit tags automatically. |
+| **Custom Metadata Attributes** | • User‑defined fields (text, number, date, enum, boolean).
• Per‑template **Custom Metadata Schemas** (e.g. *Photo* → *Date, Location*). |
+| **File Relationships** | • Typed, directional or bidirectional links (*related to*, *duplicate of*, *cites*…).
• Plugin API can register new relationship sets. |
+| **Version Control for Metadata** | • Every change logged; unlimited roll‑back.
• Side‑by‑side diff viewer and *blame* panel showing *who/when/what*.
• Offline edits stored locally and merged (Git‑style optimistic merge with conflict prompts). |
+| **Advanced Search & Smart Folders** | • Structured query syntax: `tag:ProjectX AND author:Alice`.
• Natural‑language search (*"files Alice edited last month"*) with toggle to exact mode.
• Visual Query Builder showing live query string.
• Saved queries appear as virtual “smart folders” that update in real‑time. |
+| **User Interface** | • Sidebar: tags, attributes, relationships.
• Drag‑and‑drop tagging; inline metadata editor.
• Search bar with auto‑complete (Bloom filter backed).
• **Dual View Mode** – metadata vs traditional folder; remembers preference per location.
• **Interactive 60‑second tour** on first launch plus contextual tooltip help. |
+| **Collaboration** | • Real‑time metadata sync across devices via cloud or self‑hosted relay.
• Conflict handling as per Version Control.
• Role‑based permissions (read / write / admin) on tags & attributes. |
+| **Performance & Scale** | • Sharded/distributed index optional for >1 M files.
• Query cache with LRU eviction.
• Target metrics (100 k files): cold start ≤ 3 s, complex query ≤ 150 ms (stretch 50 ms). |
+| **Backup & Restore** | • Scheduled encrypted backups; export to JSON / XML.
• One‑click restore from any point‑in‑time snapshot. |
+| **Extensibility** | • Plug‑in system (TypeScript/JS) – see §2.4.
• Python scripting hook for automation and batch tasks.
• REST/IPC API for external tools. |
+
+---
+
+## 2 Technical Implementation
+
+### 2.1 Core Stack
+
+| Component | Primary Choice | Notes |
+| -------------- | -------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------- |
+| File Manager | **Dolphin (KDE)** KIO‑based plug‑ins | GTK users can install a Nautilus extension (feature‑parity subset). |
+| Metadata Store | **SQLite + FTS5** (single‑user) → optional **LiteFS/Postgres** for replication & multi‑user scale. | Per‑row AES‑GCM encryption for sensitive fields; keys stored in OS keyring. |
+| Indexer Daemon | Rust service using `notify` (inotify on Linux, FSEvents on macOS). | 100 ms debounce batches, async SQLite writes. |
+| Cache | In‑memory LRU + Bloom filter for auto‑complete. | |
+
+### 2.2 Database Schema (simplified)
+
+```text
+files(id PK, path, inode, size, mtime, ctime, hash)
+tags(id PK, name, parent_id, canonical_id)
+file_tags(file_id FK, tag_id FK)
+attributes(id PK, file_id FK, key, value, value_type)
+relationships(id PK, src_file_id FK, dst_file_id FK, rel_type, direction)
+change_log(change_id PK, object_table, object_id, op, actor, ts, payload_json)
+```
+
+### 2.3 Sync & Conflict Resolution
+
+1. Each client appends to **change\_log** (CRDT‑compatible delta).
+2. Delta sync via WebSocket; server merges and re‑broadcasts.
+3. Conflicts → *Conflict Queue* UI (choose theirs / mine / merge).
+
+### 2.4 Plugin API (TypeScript)
+
+```ts
+export interface MarlinPlugin {
+ onInit(ctx: CoreContext): void;
+ extendSchema?(db: Database): void; // e.g. add new relationship table
+ addCommands?(ui: UIContext): void; // register menus, actions
+}
+```
+
+Plugins run in a sandboxed process with whitelisted IPC calls.
+
+---
+
+## 3 UX & Accessibility
+
+* **Keyboard‑only workflow** audit (Tab / Shift‑Tab / Space toggles).
+* High‑contrast theme; adheres to WCAG 2.1 AA.
+* `Ctrl+Alt+V` toggles Dual View.
+* Generated query string shown live under Visual Builder – educates power users.
+
+---
+
+## 4 Performance Budget
+
+| Metric | MVP | Stretch |
+| ------------------------ | --------- | ---------- |
+| Cold start (100 k files) | ≤ 3 s | 1 s |
+| Complex AND/OR query | ≤ 150 ms | 50 ms |
+| Sustained inserts | 5 k ops/s | 20 k ops/s |
+
+Benchmarks run nightly; regressions block merge.
+
+---
+
+## 5 Security & Privacy
+
+* **Role‑based ACL** on tags/attributes.
+* Per‑change audit trail; logs rotated to cold storage (≥ 90 days online).
+* Plugins confined by seccomp/AppArmor; no direct disk/network unless declared.
+
+---
+
+## 6 Packaging & Distribution
+
+* **Flatpak** (GNOME/KDE) and **AppImage** for portable builds.
+* Background service runs as a systemd user unit: `--user marlin-indexerd.service`.
+* CLI (`marlin-cli`) packaged for headless servers & CI.
+
+---
+
+## 7 Roadmap
+
+| Milestone | Scope | Timeline |
+| --------- | ----------------------------------------------------------------------------- | -------- |
+| **M1** | Tagging, attributes, virtual folders, SQLite, Dolphin plug‑in | 6 weeks |
+| **M2** | Sync service, version control, CLI | +6 weeks |
+| **M3** | NLP search, Visual Builder, distributed index prototype | +6 weeks |
+| **M4** | Plugin marketplace, enterprise auth (LDAP/OIDC), mobile companion (view‑only) | +8 weeks |
+
+---
+
+## 8 Branding
+
+* **Name**: **Marlin** – fast, precise.
+* Icon: stylised sailfish fin forming a folder corner.
+* Tagline: *“Cut through clutter.”*
+* Domain: `marlin‑explorer.io` (availability checked 2025‑05‑12).
+
+---
+
+## 9 Quick‑Win Checklist (Sprint 0)
+
+* [ ] Implement bulk metadata editor UI
+* [ ] Write conflict‑resolution spec & unit tests
+* [ ] Build diff viewer prototype
+* [ ] Keyboard‑only navigation audit
+* [ ] Establish performance CI with sample 100 k file corpus
+
+---
+
+---
+
+## 10 Development Plan (Outline)
+
+### 10.1 Process & Methodology
+
+* **Framework** – 2‑week Scrum sprints with Jira backlog, GitHub Projects mirror for public issues.
+* **Branching** – Trunk‑based: feature branches → PR → required CI & code‑review approvals (2).*Main* auto‑deploys nightly Flatpak.
+* **Definition of Done** – Code + unit tests + docs + passing CI + demo video (for UI work).
+* **CI/CD** – GitHub Actions matrix (Ubuntu 22.04, KDE Neon, Fedora 39) → Flatpak / AppImage artefacts, `cargo clippy`, coverage gate ≥ 85 %.
+
+### 10.2 Team & Roles (FTE‑equivalent)
+
+| Role | Core Skills | Allocation |
+| ----------------------------- | -------------------------------- | ---------- |
+| Lead Engineer | Rust, Qt/Kirigami, KIO | 1.0 |
+| Backend Engineer | Rust, LiteFS/Postgres, WebSocket | 1.0 |
+| Full‑stack / Plug‑in Engineer | TypeScript, Node, IPC | 0.8 |
+| UX / QA | Figma, accessibility, Playwright | 0.5 |
+| DevOps (fractional) | CI, Flatpak, security hardening | 0.2 |
+
+### 10.3 Roadmap → Sprint‑level Tasks
+
+| Sprint | Goal | Key Tasks | Exit Criteria |
+| ---------------------- | -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- |
+| **S0 (2 wks)** | Project bootstrap | • Repo + CI skeleton
• SQLite schema + migrations
• `marlin-cli init` & basic scan
• Hyperfine perf baseline | CLI scans dir; tests pass; artefact builds |
+| **S1–3 (M1, 6 wks)** | Tagging + virtual folders MVP | • Indexer daemon in Rust
• CRUD tags/attributes via CLI & DB
• Dolphin plug‑in: sidebar + tag view
• KIO `tags://` virtual folder
• Bulk‑edit dialog | 100 k‑file corpus cold‑start ≤ 3 s; user can tag files & navigate `tags://Urgent` |
+| **S4–6 (M2, 6 wks)** | Sync & version control | • Change‑log table + diff viewer
• LiteFS replication PoC
• WebSocket delta sync
• Conflict queue UI + last‑write‑wins fallback | Two devices sync metadata in <1 s round‑trip; rollback works |
+| **S7–9 (M3, 6 wks)** | NLP search & Visual Builder | • Integrate Tantivy FTS + ONNX intent model
• Toggle exact vs natural search
• QML Visual Builder with live query string | NL query "docs Alice edited last week" returns expected set in ≤ 300 ms |
+| **S10–13 (M4, 8 wks)** | Plug‑in marketplace & mobile companion | • IPC sandbox + manifest spec
• Sample plug‑ins (image EXIF auto‑tagger)
• Flutter read‑only client
• LDAP/OIDC enterprise auth | First external plug‑in published; mobile app lists smart folders |
+
+### 10.4 Tooling & Infrastructure
+
+* **Issue tracking** – Jira → labels `component/indexer`, `component/ui`.
+* **Docs** – mkdocs‑material hosted on GitHub Pages; automatic diagram generation via `cargo doc` + Mermaid.
+* **Nightly Perf Benchmarks** – Run in CI against 10 k, 100 k, 1 M synthetic corpora; fail build if P95 query > target.
+* **Security** – Dependabot, Trivy scans, optional SLSA level 2 provenance for releases.
+
+### 10.5 Risks & Mitigations
+
+| Risk | Impact | Mitigation |
+| ------------------------------ | ---------------- | --------------------------------------------------------------------------- |
+| CRDT complexity | Delays M2 | Ship LWW first; schedule CRDT refactor post‑launch |
+| File system event overflow | Index corruption | Debounce & auto‑fallback to full rescan; alert user |
+| Cross‑distro packaging pain | Adoption drops | Stick to Flatpak; AppImage only for power users; collect telemetry (opt‑in) |
+| Scaling >1 M files on slow HDD | Perf complaints | Offer "index on SSD" wizard; tune FTS page cache |
+
+### 10.6 Budget & Timeline Snapshot
+
+* **Total dev time** ≈ 30 weeks.
+* **Buffer** +10 % (3 weeks) for holidays & unknowns → **33 weeks** (\~8 months).
+* **Rough budget** (3 FTE avg × 33 wks × \$150 k/yr) ≈ **\$285 k** payroll + \$15 k ops / tooling.
+
+---
\ No newline at end of file
diff --git a/src/cli.rs b/src/cli.rs
new file mode 100644
index 0000000..c6c4683
--- /dev/null
+++ b/src/cli.rs
@@ -0,0 +1,29 @@
+use std::path::PathBuf;
+
+use clap::{Parser, Subcommand};
+
+/// Marlin – metadata-driven file explorer (CLI utilities)
+#[derive(Parser, Debug)]
+#[command(author, version, about)]
+pub struct Cli {
+ #[command(subcommand)]
+ pub command: Commands,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum Commands {
+ /// Initialise the database (idempotent)
+ Init,
+ /// Scan a directory and populate the file index
+ Scan {
+ /// Directory to walk
+ path: PathBuf,
+ },
+ /// Tag files matching a glob pattern
+ Tag {
+ /// Glob pattern (quote to avoid shell expansion)
+ pattern: String,
+ /// Tag name
+ tag: String,
+ },
+}
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..0cd4bc4
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,31 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Result;
+use directories::ProjectDirs;
+
+/// Runtime configuration (currently just the DB path).
+#[derive(Debug, Clone)]
+pub struct Config {
+ pub db_path: PathBuf,
+}
+
+impl Config {
+ /// Resolve configuration from environment or XDG directories.
+ pub fn load() -> Result {
+ let db_path = std::env::var_os("MARLIN_DB_PATH")
+ .map(PathBuf::from)
+ .or_else(|| {
+ ProjectDirs::from("io", "Marlin", "marlin")
+ .map(|dirs| dirs.data_dir().join("index.db"))
+ })
+ .unwrap_or_else(|| Path::new("index.db").to_path_buf());
+
+ std::fs::create_dir_all(
+ db_path
+ .parent()
+ .expect("db_path should always have a parent directory"),
+ )?;
+
+ Ok(Self { db_path })
+ }
+}
diff --git a/src/db/migrations.sql b/src/db/migrations.sql
new file mode 100644
index 0000000..863be5f
--- /dev/null
+++ b/src/db/migrations.sql
@@ -0,0 +1,22 @@
+PRAGMA foreign_keys = ON;
+
+CREATE TABLE IF NOT EXISTS files (
+ id INTEGER PRIMARY KEY,
+ path TEXT NOT NULL UNIQUE,
+ size INTEGER,
+ mtime INTEGER
+);
+
+CREATE TABLE IF NOT EXISTS tags (
+ id INTEGER PRIMARY KEY,
+ name TEXT NOT NULL UNIQUE
+);
+
+CREATE TABLE IF NOT EXISTS file_tags (
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+ tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
+ PRIMARY KEY (file_id, tag_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
+CREATE INDEX IF NOT EXISTS idx_file_tags_tag_id ON file_tags(tag_id);
diff --git a/src/db/mod.rs b/src/db/mod.rs
new file mode 100644
index 0000000..15efa4b
--- /dev/null
+++ b/src/db/mod.rs
@@ -0,0 +1,28 @@
+use std::path::Path;
+
+use anyhow::Result;
+use rusqlite::{params, Connection};
+
+const MIGRATIONS_SQL: &str = include_str!("migrations.sql");
+
+/// Open (or create) the SQLite database and run embedded migrations.
+pub fn open>(db_path: P) -> Result {
+ let mut conn = Connection::open(db_path)?;
+ conn.pragma_update(None, "journal_mode", "WAL")?;
+ conn.execute_batch(MIGRATIONS_SQL)?;
+ Ok(conn)
+}
+
+/// Ensure a tag exists, returning its id.
+pub fn ensure_tag(conn: &Connection, tag: &str) -> Result {
+ conn.execute(
+ "INSERT OR IGNORE INTO tags(name) VALUES (?1)",
+ params![tag],
+ )?;
+ let id: i64 = conn.query_row(
+ "SELECT id FROM tags WHERE name = ?1",
+ params![tag],
+ |row| row.get(0),
+ )?;
+ Ok(id)
+}
diff --git a/src/logging.rs b/src/logging.rs
new file mode 100644
index 0000000..a0141ed
--- /dev/null
+++ b/src/logging.rs
@@ -0,0 +1,13 @@
+use tracing_subscriber::{fmt, EnvFilter};
+
+/// Initialise global tracing subscriber.
+///
+/// Reads `RUST_LOG` for filtering, falls back to `info`.
+pub fn init() {
+ let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+ fmt()
+ .with_target(false)
+ .with_level(true)
+ .with_env_filter(filter)
+ .init();
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..d67043f
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,60 @@
+mod cli;
+mod config;
+mod db;
+mod logging;
+mod scan;
+
+use anyhow::Result;
+use cli::{Cli, Commands};
+use glob::glob;
+use rusqlite::params;
+use tracing::{error, info};
+
+fn main() -> Result<()> {
+ logging::init();
+
+ let args = Cli::parse();
+ let cfg = config::Config::load()?;
+ let conn = db::open(&cfg.db_path)?;
+
+ match args.command {
+ Commands::Init => {
+ info!("database initialised at {}", cfg.db_path.display());
+ }
+ Commands::Scan { path } => {
+ scan::scan_directory(&conn, &path)?;
+ }
+ Commands::Tag { pattern, tag } => {
+ apply_tag(&conn, &pattern, &tag)?;
+ }
+ }
+
+ Ok(())
+}
+
+/// Apply `tag` to every file that matches `pattern`.
+fn apply_tag(conn: &rusqlite::Connection, pattern: &str, tag: &str) -> Result<()> {
+ let tag_id = db::ensure_tag(conn, tag)?;
+ let mut stmt_file = conn.prepare("SELECT id FROM files WHERE path = ?1")?;
+ let mut stmt_insert = conn.prepare(
+ "INSERT OR IGNORE INTO file_tags(file_id, tag_id) VALUES (?1, ?2)",
+ )?;
+
+ for entry in glob(pattern)? {
+ match entry {
+ Ok(path) => {
+ let path_str = path.to_string_lossy();
+ if let Ok(file_id) =
+ stmt_file.query_row(params![path_str], |row| row.get::<_, i64>(0))
+ {
+ stmt_insert.execute(params![file_id, tag_id])?;
+ info!(file = %path_str, tag = tag, "tagged");
+ } else {
+ error!(file = %path_str, "file not in index – run `marlin scan` first");
+ }
+ }
+ Err(e) => error!(error = %e, "glob error"),
+ }
+ }
+ Ok(())
+}
diff --git a/src/scan.rs b/src/scan.rs
new file mode 100644
index 0000000..8e0f4cb
--- /dev/null
+++ b/src/scan.rs
@@ -0,0 +1,41 @@
+use std::fs;
+use std::path::Path;
+
+use anyhow::Result;
+use rusqlite::{params, Connection};
+use tracing::{debug, info};
+use walkdir::WalkDir;
+
+/// Recursively walk `root` and upsert file metadata.
+pub fn scan_directory(conn: &Connection, root: &Path) -> Result {
+ let tx = conn.transaction()?;
+ let mut stmt = tx.prepare(
+ r#"
+ INSERT INTO files(path, size, mtime)
+ VALUES (?1, ?2, ?3)
+ ON CONFLICT(path) DO UPDATE
+ SET size = excluded.size,
+ mtime = excluded.mtime
+ "#,
+ )?;
+
+ let mut count = 0usize;
+ for entry in WalkDir::new(root).into_iter().filter_map(Result::ok).filter(|e| e.file_type().is_file())
+ {
+ let meta = fs::metadata(entry.path())?;
+ let size = meta.len() as i64;
+ let mtime = meta
+ .modified()?
+ .duration_since(std::time::UNIX_EPOCH)?
+ .as_secs() as i64;
+
+ let path_str = entry.path().to_string_lossy();
+ stmt.execute(params![path_str, size, mtime])?;
+ count += 1;
+ debug!(file = %path_str, "indexed");
+ }
+
+ tx.commit()?;
+ info!(indexed = count, "scan complete");
+ Ok(count)
+}