Merge pull request #7 from PR0M3TH3AN/beta

Beta
This commit is contained in:
thePR0M3TH3AN
2025-05-19 00:00:22 -04:00
committed by GitHub
13 changed files with 284 additions and 50 deletions

8
.gitignore vendored
View File

@@ -38,6 +38,10 @@ test.db
.env.* .env.*
# === Other === # === Other Files ===
repo-context.txt repo-context.txt
saved_config.yaml saved_config.yaml
# === Other Dirs ===
/bench/corpus
/bench/backups

4
bench/dirty-vs-full.md Normal file
View File

@@ -0,0 +1,4 @@
| Command | Mean [ms] | Min [ms] | Max [ms] | Relative |
|:---|---:|---:|---:|---:|
| `full-scan` | 427.0 ± 30.5 | 402.2 | 467.4 | 6.36 ± 0.49 |
| `dirty-scan` | 67.2 ± 2.1 | 64.7 | 71.6 | 1.00 |

91
bench/dirty-vs-full.sh Executable file
View File

@@ -0,0 +1,91 @@
#!/usr/bin/env bash
#
# bench/dirty-vs-full.sh
#
# Compare full-scan vs dirty-scan performance on a large corpus,
# simulating a random set of file modifications before each dirty scan,
# and reporting corpus size, number of dirty files, and speedup.
#
set -euo pipefail
IFS=$'\n\t'
# Path to the marlin binary (adjust if you build elsewhere)
MARLIN_BIN=${MARLIN_BIN:-target/release/marlin}
# Directory containing your test corpus (100k+ files)
CORPUS_DIR=${CORPUS_DIR:-bench/corpus}
# Where to put the ephemeral DB
DB_PATH=${DB_PATH:-bench/index.db}
# How many files to mark dirty before each dirtyscan run
DIRTY_COUNT=${DIRTY_COUNT:-100}
# Number of warmup runs
WARMUPS=${WARMUPS:-3}
# Tell Marlin where to write its DB
export MARLIN_DB_PATH="$DB_PATH"
# Ensure hyperfine is installed
if ! command -v hyperfine &>/dev/null; then
echo "Error: hyperfine not found. Please install it and try again." >&2
exit 1
fi
# Ensure our corpus exists
if [ ! -d "$CORPUS_DIR" ]; then
echo "Error: corpus directory '$CORPUS_DIR' not found." >&2
exit 1
fi
# Count corpus size
CORPUS_SIZE=$(find "$CORPUS_DIR" -type f | wc -l | tr -d ' ')
echo "→ Corpus size: $CORPUS_SIZE files"
echo "→ Will mark $DIRTY_COUNT files dirty per dirtyscan run"
echo
# Clean up any old database
rm -f "$DB_PATH"
# First, populate the DB once so that dirty-scan has something to do
echo "→ Initial full scan to populate DB"
"$MARLIN_BIN" scan "$CORPUS_DIR" >/dev/null 2>&1
echo
echo "→ Benchmarking full vs dirty scan with hyperfine"
hyperfine \
--warmup "$WARMUPS" \
--prepare "
# wipe and re-populate
rm -f '$DB_PATH'
mkdir -p bench
export MARLIN_DB_PATH='$DB_PATH'
$MARLIN_BIN scan '$CORPUS_DIR' >/dev/null 2>&1
# seed $DIRTY_COUNT random files as 'dirty' in the DB
sqlite3 '$DB_PATH' \"INSERT OR IGNORE INTO file_changes(file_id, marked_at)
SELECT id, strftime('%s','now') FROM files
ORDER BY RANDOM()
LIMIT $DIRTY_COUNT;\"
" \
--command-name "full-scan" "MARLIN_DB_PATH='$DB_PATH' $MARLIN_BIN scan '$CORPUS_DIR' >/dev/null 2>&1" \
--command-name "dirty-scan" "MARLIN_DB_PATH='$DB_PATH' $MARLIN_BIN scan --dirty '$CORPUS_DIR' >/dev/null 2>&1" \
--export-markdown bench/dirty-vs-full.md
echo
echo "Results written to bench/dirty-vs-full.md"
# Extract the speedup factor from the markdown table:
# the "Relative" column on the full-scan row tells us how many times
# slower full-scan is relative to dirty-scan (baseline = 1.00).
SPEEDUP=$(grep '\`full-scan\`' bench/dirty-vs-full.md \
| awk -F'|' '{print $5}' \
| xargs)
echo
echo "→ Summary:"
echo " Corpus size: $CORPUS_SIZE files"
echo " Dirty files seeded: $DIRTY_COUNT"
echo " Dirtyscan speedup: dirty-scan ran $SPEEDUP times faster than full-scan"

30
bench/gen-corpus.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# bench/gen-corpus.sh
#
# Generate a synthetic corpus of N files in nested directories.
# Defaults to 1 000 files so it stays laptop-friendly.
#
set -euo pipefail
IFS=$'\n\t'
# How many files? (default: 1 000)
COUNT=${COUNT:-100000}
# Where to put them
TARGET=${TARGET:-bench/corpus}
# Wipe any old corpus
rm -rf "$TARGET"
mkdir -p "$TARGET"
echo "🚀 Generating $COUNT files under $TARGET"
for i in $(seq 1 "$COUNT"); do
# bucket into 100 sub-dirs so walkdir has some structure
dir_index=$(( (i - 1) / (COUNT / 100 + 1) ))
subdir="$TARGET/dir$(printf "%03d" "$dir_index")"
mkdir -p "$subdir"
echo "This is file #$i" > "$subdir/file_$i.txt"
done
echo "✅ Done: $(find "$TARGET" -type f | wc -l) files created."

View File

@@ -1,4 +1,5 @@
// src/cli.rs // src/cli.rs
pub mod link; pub mod link;
pub mod coll; pub mod coll;
pub mod view; pub mod view;
@@ -42,6 +43,10 @@ pub enum Commands {
/// Scan one or more directories and populate the file index /// Scan one or more directories and populate the file index
Scan { Scan {
/// Only re-index files marked dirty by `marlin watch`
#[arg(long)]
dirty: bool,
/// Directories to scan (defaults to cwd) /// Directories to scan (defaults to cwd)
paths: Vec<std::path::PathBuf>, paths: Vec<std::path::PathBuf>,
}, },

View File

@@ -16,6 +16,7 @@ use libmarlin::{
scan, scan,
utils::determine_scan_root, utils::determine_scan_root,
}; };
use libmarlin::db::take_dirty;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use clap::{CommandFactory, Parser}; use clap::{CommandFactory, Parser};
@@ -83,13 +84,31 @@ fn main() -> Result<()> {
} }
/* ---- scan ------------------------------------------------ */ /* ---- scan ------------------------------------------------ */
Commands::Scan { paths } => { Commands::Scan { dirty, paths } => {
let scan_paths = if paths.is_empty() { // Determine full-scan roots
let scan_paths: Vec<std::path::PathBuf> = if paths.is_empty() {
vec![env::current_dir()?] vec![env::current_dir()?]
} else { paths }; } else {
paths.into_iter().collect()
};
for p in scan_paths { if dirty {
scan::scan_directory(&mut conn, &p)?; // Incremental: only re-index the files marked dirty
let dirty_ids = take_dirty(&conn)?;
for id in dirty_ids {
// look up each path by its file_id
let path: String = conn.query_row(
"SELECT path FROM files WHERE id = ?1",
[id],
|r| r.get(0),
)?;
scan::scan_directory(&mut conn, Path::new(&path))?;
}
} else {
// Full rescan of the given directories
for p in scan_paths {
scan::scan_directory(&mut conn, &p)?;
}
} }
} }

View File

@@ -12,23 +12,34 @@ Weve landed a basic SQLite-backed `files` table and a contentless FTS5 index.
- **Custom attributes** (`attributes`) - **Custom attributes** (`attributes`)
- **File-to-file relationships** (`links`) - **File-to-file relationships** (`links`)
- **Named collections** (`collections` + `collection_files`) - **Named collections** (`collections` + `collection_files`)
- **Saved views** (`views`) - **Saved views** (`saved_views`)
Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about. Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about.
*Note: Tag aliases and their `canonical_id` support are deferred to DP-006 (v1.5).*
## 2. Decision ## 2. Decision
1. **Bump to schema version 1.1** in our migration table. Each migration will begin by enabling foreign-key enforcement and WAL journaling:
```sql
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
````
All foreign keys use `ON DELETE CASCADE` so deleting a file, tag, etc. automatically cleans up dependents.
1. **Bump to schema version 1.1** in our `schema_version` table.
2. Provide four migration scripts, applied in order: 2. Provide four migration scripts, applied in order:
1. `0001_initial_schema.sql` create `files`, `tags`, `file_tags`, `attributes`, `files_fts`, core FTS triggers.
2. `0002_update_fts_and_triggers.sql` replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh. 1. **0001\_initial\_schema.sql** create core tables (`files`, `tags`, `file_tags`, `attributes`), a contentless FTS5 table (`files_fts`), core FTS triggers, and performance-critical indexes.
3. `0003_create_links_collections_views.sql` introduce `links`, `collections`, `collection_files`, `views` tables. 2. **0002\_update\_fts\_and\_triggers.sql** replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh.
4. `0004_fix_hierarchical_tags_fts.sql` refine FTS triggers to index full hierarchical tag-paths via a recursive CTE. 3. **0003\_create\_links\_collections\_saved\_views.sql** introduce `links`, `collections`, `collection_files`, and `saved_views` tables.
4. **0004\_fix\_hierarchical\_tags\_fts.sql** refine FTS triggers to index full hierarchical tag-paths via a recursive CTE.
3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store. 3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store.
## 3. ER Diagram ## 3. ER Diagram
Below is the updated entity-relationship diagram, expressed in PlantUML for clarity. It shows all of the core metadata domains and their relationships: Below is the updated entity-relationship diagram (PlantUML):
```plantuml ```plantuml
@startuml @startuml
@@ -42,11 +53,10 @@ entity files {
} }
entity tags { entity tags {
* id : INTEGER <<PK>> * id : INTEGER <<PK>>
-- --
name : TEXT name : TEXT
parent_id : INTEGER <<FK>> parent_id : INTEGER <<FK>>
canonical_id : INTEGER <<FK>>
} }
entity file_tags { entity file_tags {
@@ -63,7 +73,7 @@ entity attributes {
} }
entity links { entity links {
* id : INTEGER <<PK>> * id : INTEGER <<PK>>
-- --
src_file_id : INTEGER <<FK>> src_file_id : INTEGER <<FK>>
dst_file_id : INTEGER <<FK>> dst_file_id : INTEGER <<FK>>
@@ -81,7 +91,7 @@ entity collection_files {
* file_id : INTEGER <<FK>> * file_id : INTEGER <<FK>>
} }
entity views { entity saved_views {
* id : INTEGER <<PK>> * id : INTEGER <<PK>>
-- --
name : TEXT name : TEXT
@@ -99,11 +109,11 @@ files ||--o{ links : "dst_file_id"
collections ||--o{ collection_files collections ||--o{ collection_files
files ||--o{ collection_files files ||--o{ collection_files
views ||..|| files : "smart queries (via FTS)" saved_views ||..|| files : "exec via FTS"
@enduml @enduml
```` ```
*(If you prefer a plainASCII sketch, you can replace the above PlantUML block with:)* Or in plain-ASCII:
```ascii ```ascii
┌────────┐ ┌────────────┐ ┌───────┐ ┌────────┐ ┌────────────┐ ┌───────┐
@@ -124,19 +134,28 @@ views ||..|| files : "smart queries (via FTS)"
│ collections │1──*─│ collection_files │*──1─│ files │ │ collections │1──*─│ collection_files │*──1─│ files │
└─────────────┘ └──────────────────┘ └────────┘ └─────────────┘ └──────────────────┘ └────────┘
┌───────┐ ┌─────────────
│ views │ saved_views │
└───────┘ │ (exec FTS) │
└─────────────┘
``` ```
## 4. Migration Summary ## 4. Migration Summary
| File | Purpose | | File | Purpose |
| ----------------------------------------------- | ------------------------------------------------------- | | ------------------------------------------------------ | ------------------------------------------------------------- |
| **0001\_initial\_schema.sql** | Core tables + contentless FTS + path/triggers | | **0001\_initial\_schema.sql** | Core tables + contentless FTS + core triggers + indexes |
| **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes | | **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes |
| **0003\_create\_links\_collections\_views.sql** | Add `links`, `collections`, `collection_files`, `views` | | **0003\_create\_links\_collections\_saved\_views.sql** | Add `links`, `collections`, `collection_files`, `saved_views` |
| **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full path tag indexing | | **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full tag-path indexing in FTS triggers |
### Performance-Critical Indexes
* `idx_files_path` on `files(path)`
* `idx_files_hash` on `files(hash)`
* `idx_tags_name_parent` on `tags(name, parent_id)`
* `idx_file_tags_tag_id` on `file_tags(tag_id)`
* `idx_attr_file_key` on `attributes(file_id, key)`
## 5. Example CLI Session ## 5. Example CLI Session
@@ -156,6 +175,10 @@ Saved view 'tasks' = tag:project AND TODO
$ marlin view list $ marlin view list
tasks: tag:project AND TODO tasks: tag:project AND TODO
$ marlin view exec tasks
~/Projects/Alpha/draft1.md
~/Projects/Beta/final.md
``` ```
## 6. Consequences ## 6. Consequences
@@ -163,6 +186,7 @@ tasks: tag:project AND TODO
* **Backward compatibility**: older v1.0 stores will be migrated on first open. * **Backward compatibility**: older v1.0 stores will be migrated on first open.
* **Stability**: downstream features (TUI, VS Code, web UI) can depend on a stable v1.1 schema. * **Stability**: downstream features (TUI, VS Code, web UI) can depend on a stable v1.1 schema.
* **Simplicity**: by consolidating metadata domains now, future migrations remain small and focused. * **Simplicity**: by consolidating metadata domains now, future migrations remain small and focused.
* **Performance**: v1.1 schema meets our cold-start P95 ≤ 3 s on a 100 k-file corpus (with CI-enforced benchmarks and the indexes above).
--- ---

View File

@@ -0,0 +1,8 @@
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
-- Track which files need re-indexing
CREATE TABLE IF NOT EXISTS file_changes (
file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
marked_at INTEGER NOT NULL -- UNIX timestamp
);

View File

@@ -15,7 +15,7 @@ use rusqlite::{
Connection, Connection,
OpenFlags, OpenFlags,
OptionalExtension, OptionalExtension,
TransactionBehavior, TransactionBehavior,
}; };
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
@@ -26,6 +26,7 @@ const MIGRATIONS: &[(&str, &str)] = &[
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")), ("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")), ("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")), ("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
("0005_add_dirty_table.sql", include_str!("migrations/0005_add_dirty_table.sql")),
]; ];
/* ─── connection bootstrap ────────────────────────────────────────── */ /* ─── connection bootstrap ────────────────────────────────────────── */
@@ -39,13 +40,12 @@ pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
conn.pragma_update(None, "foreign_keys", "ON")?; conn.pragma_update(None, "foreign_keys", "ON")?;
// Wait up to 30 s for a competing writer before giving up // Wait up to 30 s for a competing writer before giving up
conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked conn.busy_timeout(std::time::Duration::from_secs(30))?;
apply_migrations(&mut conn)?; apply_migrations(&mut conn)?;
Ok(conn) Ok(conn)
} }
/* ─── migration runner ────────────────────────────────────────────── */ /* ─── migration runner ────────────────────────────────────────────── */
pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> { pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
@@ -85,7 +85,7 @@ pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
info!("applying migration {}", fname); info!("applying migration {}", fname);
tx.execute_batch(sql) tx.execute_batch(sql)
.with_context(|| format!("could not apply migration {fname}"))?; .with_context(|| format!("could not apply migration {}", fname))?;
tx.execute( tx.execute(
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)", "INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
@@ -158,7 +158,12 @@ pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> R
/* ─── links ───────────────────────────────────────────────────────── */ /* ─── links ───────────────────────────────────────────────────────── */
pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { pub fn add_link(
conn: &Connection,
src_file_id: i64,
dst_file_id: i64,
link_type: Option<&str>,
) -> Result<()> {
conn.execute( conn.execute(
"INSERT INTO links(src_file_id, dst_file_id, type) "INSERT INTO links(src_file_id, dst_file_id, type)
VALUES (?1, ?2, ?3) VALUES (?1, ?2, ?3)
@@ -168,7 +173,12 @@ pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type
Ok(()) Ok(())
} }
pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> { pub fn remove_link(
conn: &Connection,
src_file_id: i64,
dst_file_id: i64,
link_type: Option<&str>,
) -> Result<()> {
conn.execute( conn.execute(
"DELETE FROM links "DELETE FROM links
WHERE src_file_id = ?1 WHERE src_file_id = ?1
@@ -190,8 +200,10 @@ pub fn list_links(
// Files matching pattern // Files matching pattern
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?; let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
let rows = stmt let rows = stmt
.query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))? .query_map(params![like_pattern], |r| {
.collect::<Result<Vec<_>, _>>()?; Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?))
})?
.collect::<StdResult<Vec<_>, _>>()?;
let mut out = Vec::new(); let mut out = Vec::new();
for (fid, fpath) in rows { for (fid, fpath) in rows {
@@ -210,8 +222,10 @@ pub fn list_links(
let mut stmt2 = conn.prepare(&sql)?; let mut stmt2 = conn.prepare(&sql)?;
let links = stmt2 let links = stmt2
.query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?)))? .query_map(params![fid, link_type], |r| {
.collect::<Result<Vec<_>, _>>()?; Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
})?
.collect::<StdResult<Vec<_>, _>>()?;
for (other, typ) in links { for (other, typ) in links {
out.push((fpath.clone(), other, typ)); out.push((fpath.clone(), other, typ));
@@ -238,11 +252,11 @@ pub fn find_backlinks(
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?)) Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
})?; })?;
let out = rows.collect::<StdResult<Vec<_>, _>>()?; // rusqlite → anyhow via `?` let out = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(out) Ok(out)
} }
/* ─── NEW: collections helpers ────────────────────────────────────── */ /* ─── collections helpers ────────────────────────────────────────── */
pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> { pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> {
conn.execute( conn.execute(
@@ -281,7 +295,7 @@ pub fn list_collection(conn: &Connection, name: &str) -> Result<Vec<String>> {
Ok(list) Ok(list)
} }
/* ─── NEW: saved views (smart folders) ────────────────────────────── */ /* ─── saved views (smart folders) ───────────────────────────────── */
pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> { pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
conn.execute( conn.execute(
@@ -295,7 +309,6 @@ pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> { pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> {
let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?; let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?; let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
let list = rows.collect::<StdResult<Vec<_>, _>>()?; let list = rows.collect::<StdResult<Vec<_>, _>>()?;
Ok(list) Ok(list)
@@ -307,7 +320,32 @@ pub fn view_query(conn: &Connection, name: &str) -> Result<String> {
[name], [name],
|r| r.get::<_, String>(0), |r| r.get::<_, String>(0),
) )
.context(format!("no view called '{name}'")) .context(format!("no view called '{}'", name))
}
/* ─── dirtyscan helpers ─────────────────────────────────────────── */
/// Mark a file as “dirty” so itll be picked up by `scan_dirty`.
pub fn mark_dirty(conn: &Connection, file_id: i64) -> Result<()> {
conn.execute(
"INSERT OR IGNORE INTO file_changes(file_id, marked_at)
VALUES (?1, strftime('%s','now'))",
params![file_id],
)?;
Ok(())
}
/// Take and clear all dirty file IDs for incremental re-scan.
pub fn take_dirty(conn: &Connection) -> Result<Vec<i64>> {
let mut ids = Vec::new();
{
let mut stmt = conn.prepare("SELECT file_id FROM file_changes")?;
for row in stmt.query_map([], |r| r.get(0))? {
ids.push(row?);
}
}
conn.execute("DELETE FROM file_changes", [])?;
Ok(ids)
} }
/* ─── backup / restore helpers ────────────────────────────────────── */ /* ─── backup / restore helpers ────────────────────────────────────── */

View File

@@ -11,7 +11,10 @@ use walkdir::WalkDir;
/// Recursively walk `root` and upsert file metadata. /// Recursively walk `root` and upsert file metadata.
/// Triggers keep the FTS table in sync. /// Triggers keep the FTS table in sync.
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> { pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
// Begin a transaction so we batch many inserts/updates together
let tx = conn.transaction()?; let tx = conn.transaction()?;
// Prepare the upsert statement once
let mut stmt = tx.prepare( let mut stmt = tx.prepare(
r#" r#"
INSERT INTO files(path, size, mtime) INSERT INTO files(path, size, mtime)
@@ -23,12 +26,15 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
)?; )?;
let mut count = 0usize; let mut count = 0usize;
// Walk the directory recursively
for entry in WalkDir::new(root) for entry in WalkDir::new(root)
.into_iter() .into_iter()
.filter_map(Result::ok) .filter_map(Result::ok)
.filter(|e| e.file_type().is_file()) .filter(|e| e.file_type().is_file())
{ {
let path = entry.path(); let path = entry.path();
// Skip the database file and its WAL/SHM siblings // Skip the database file and its WAL/SHM siblings
if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.ends_with(".db") || name.ends_with("-wal") || name.ends_with("-shm") { if name.ends_with(".db") || name.ends_with("-wal") || name.ends_with("-shm") {
@@ -36,6 +42,7 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
} }
} }
// Gather file metadata
let meta = fs::metadata(path)?; let meta = fs::metadata(path)?;
let size = meta.len() as i64; let size = meta.len() as i64;
let mtime = meta let mtime = meta
@@ -43,14 +50,18 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
.duration_since(std::time::UNIX_EPOCH)? .duration_since(std::time::UNIX_EPOCH)?
.as_secs() as i64; .as_secs() as i64;
// Execute the upsert
let path_str = path.to_string_lossy(); let path_str = path.to_string_lossy();
stmt.execute(params![path_str, size, mtime])?; stmt.execute(params![path_str, size, mtime])?;
count += 1; count += 1;
debug!(file = %path_str, "indexed"); debug!(file = %path_str, "indexed");
} }
// Finalize and commit
drop(stmt); drop(stmt);
tx.commit()?; tx.commit()?;
info!(indexed = count, "scan complete"); info!(indexed = count, "scan complete");
Ok(count) Ok(count)
} }

View File

@@ -1 +1 @@
{"rustc_fingerprint":17558195974417946175,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"x87\"\ntarget_has_atomic\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_has_reliable_f128\ntarget_has_reliable_f16\ntarget_has_reliable_f16_math\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\nub_checks\nunix\n","stderr":""},"10431901537437931773":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"x87\"\ntarget_has_atomic\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_has_reliable_f128\ntarget_has_reliable_f16\ntarget_has_reliable_f16_math\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\ntarpaulin\nub_checks\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.89.0-nightly (777d37277 2025-05-17)\nbinary: rustc\ncommit-hash: 777d372772aa3b39ba7273fcb8208a89f2ab0afd\ncommit-date: 2025-05-17\nhost: x86_64-unknown-linux-gnu\nrelease: 1.89.0-nightly\nLLVM version: 20.1.4\n","stderr":""}},"successes":{}} {"rustc_fingerprint":10768506583288887294,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.86.0 (05f9846f8 2025-03-31)\nbinary: rustc\ncommit-hash: 05f9846f893b09a1be1fc8560e33fc3c815cfecb\ncommit-date: 2025-03-31\nhost: x86_64-unknown-linux-gnu\nrelease: 1.86.0\nLLVM version: 19.1.7\n","stderr":""}},"successes":{}}

Binary file not shown.

View File

@@ -1 +1 @@
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/cli-bin/build.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/main.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/config.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/lib.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/logging.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/scan.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/utils.rs /home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/cli-bin/build.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/main.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/config.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0005_add_dirty_table.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/lib.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/logging.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/scan.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/utils.rs