mirror of
https://github.com/PR0M3TH3AN/Marlin.git
synced 2025-09-08 07:08:44 +00:00
8
.gitignore
vendored
8
.gitignore
vendored
@@ -38,6 +38,10 @@ test.db
|
|||||||
.env.*
|
.env.*
|
||||||
|
|
||||||
|
|
||||||
# === Other ===
|
# === Other Files ===
|
||||||
repo-context.txt
|
repo-context.txt
|
||||||
saved_config.yaml
|
saved_config.yaml
|
||||||
|
|
||||||
|
# === Other Dirs ===
|
||||||
|
/bench/corpus
|
||||||
|
/bench/backups
|
4
bench/dirty-vs-full.md
Normal file
4
bench/dirty-vs-full.md
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
| Command | Mean [ms] | Min [ms] | Max [ms] | Relative |
|
||||||
|
|:---|---:|---:|---:|---:|
|
||||||
|
| `full-scan` | 427.0 ± 30.5 | 402.2 | 467.4 | 6.36 ± 0.49 |
|
||||||
|
| `dirty-scan` | 67.2 ± 2.1 | 64.7 | 71.6 | 1.00 |
|
91
bench/dirty-vs-full.sh
Executable file
91
bench/dirty-vs-full.sh
Executable file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# bench/dirty-vs-full.sh
|
||||||
|
#
|
||||||
|
# Compare full-scan vs dirty-scan performance on a large corpus,
|
||||||
|
# simulating a random set of file modifications before each dirty scan,
|
||||||
|
# and reporting corpus size, number of dirty files, and speedup.
|
||||||
|
#
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
IFS=$'\n\t'
|
||||||
|
|
||||||
|
# Path to the marlin binary (adjust if you build elsewhere)
|
||||||
|
MARLIN_BIN=${MARLIN_BIN:-target/release/marlin}
|
||||||
|
|
||||||
|
# Directory containing your test corpus (100k+ files)
|
||||||
|
CORPUS_DIR=${CORPUS_DIR:-bench/corpus}
|
||||||
|
|
||||||
|
# Where to put the ephemeral DB
|
||||||
|
DB_PATH=${DB_PATH:-bench/index.db}
|
||||||
|
|
||||||
|
# How many files to mark dirty before each dirty‐scan run
|
||||||
|
DIRTY_COUNT=${DIRTY_COUNT:-100}
|
||||||
|
|
||||||
|
# Number of warm‐up runs
|
||||||
|
WARMUPS=${WARMUPS:-3}
|
||||||
|
|
||||||
|
# Tell Marlin where to write its DB
|
||||||
|
export MARLIN_DB_PATH="$DB_PATH"
|
||||||
|
|
||||||
|
# Ensure hyperfine is installed
|
||||||
|
if ! command -v hyperfine &>/dev/null; then
|
||||||
|
echo "Error: hyperfine not found. Please install it and try again." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure our corpus exists
|
||||||
|
if [ ! -d "$CORPUS_DIR" ]; then
|
||||||
|
echo "Error: corpus directory '$CORPUS_DIR' not found." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Count corpus size
|
||||||
|
CORPUS_SIZE=$(find "$CORPUS_DIR" -type f | wc -l | tr -d ' ')
|
||||||
|
echo "→ Corpus size: $CORPUS_SIZE files"
|
||||||
|
echo "→ Will mark $DIRTY_COUNT files dirty per dirty‐scan run"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Clean up any old database
|
||||||
|
rm -f "$DB_PATH"
|
||||||
|
|
||||||
|
# First, populate the DB once so that dirty-scan has something to do
|
||||||
|
echo "→ Initial full scan to populate DB"
|
||||||
|
"$MARLIN_BIN" scan "$CORPUS_DIR" >/dev/null 2>&1
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "→ Benchmarking full vs dirty scan with hyperfine"
|
||||||
|
hyperfine \
|
||||||
|
--warmup "$WARMUPS" \
|
||||||
|
--prepare "
|
||||||
|
# wipe and re-populate
|
||||||
|
rm -f '$DB_PATH'
|
||||||
|
mkdir -p bench
|
||||||
|
export MARLIN_DB_PATH='$DB_PATH'
|
||||||
|
$MARLIN_BIN scan '$CORPUS_DIR' >/dev/null 2>&1
|
||||||
|
|
||||||
|
# seed $DIRTY_COUNT random files as 'dirty' in the DB
|
||||||
|
sqlite3 '$DB_PATH' \"INSERT OR IGNORE INTO file_changes(file_id, marked_at)
|
||||||
|
SELECT id, strftime('%s','now') FROM files
|
||||||
|
ORDER BY RANDOM()
|
||||||
|
LIMIT $DIRTY_COUNT;\"
|
||||||
|
" \
|
||||||
|
--command-name "full-scan" "MARLIN_DB_PATH='$DB_PATH' $MARLIN_BIN scan '$CORPUS_DIR' >/dev/null 2>&1" \
|
||||||
|
--command-name "dirty-scan" "MARLIN_DB_PATH='$DB_PATH' $MARLIN_BIN scan --dirty '$CORPUS_DIR' >/dev/null 2>&1" \
|
||||||
|
--export-markdown bench/dirty-vs-full.md
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "Results written to bench/dirty-vs-full.md"
|
||||||
|
|
||||||
|
# Extract the speedup factor from the markdown table:
|
||||||
|
# the "Relative" column on the full-scan row tells us how many times
|
||||||
|
# slower full-scan is relative to dirty-scan (baseline = 1.00).
|
||||||
|
SPEEDUP=$(grep '\`full-scan\`' bench/dirty-vs-full.md \
|
||||||
|
| awk -F'|' '{print $5}' \
|
||||||
|
| xargs)
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "→ Summary:"
|
||||||
|
echo " Corpus size: $CORPUS_SIZE files"
|
||||||
|
echo " Dirty files seeded: $DIRTY_COUNT"
|
||||||
|
echo " Dirty‐scan speedup: dirty-scan ran $SPEEDUP times faster than full-scan"
|
30
bench/gen-corpus.sh
Executable file
30
bench/gen-corpus.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# bench/gen-corpus.sh
|
||||||
|
#
|
||||||
|
# Generate a synthetic corpus of N files in nested directories.
|
||||||
|
# Defaults to 1 000 files so it stays laptop-friendly.
|
||||||
|
#
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
IFS=$'\n\t'
|
||||||
|
|
||||||
|
# How many files? (default: 1 000)
|
||||||
|
COUNT=${COUNT:-100000}
|
||||||
|
# Where to put them
|
||||||
|
TARGET=${TARGET:-bench/corpus}
|
||||||
|
|
||||||
|
# Wipe any old corpus
|
||||||
|
rm -rf "$TARGET"
|
||||||
|
mkdir -p "$TARGET"
|
||||||
|
|
||||||
|
echo "🚀 Generating $COUNT files under $TARGET…"
|
||||||
|
for i in $(seq 1 "$COUNT"); do
|
||||||
|
# bucket into 100 sub-dirs so walkdir has some structure
|
||||||
|
dir_index=$(( (i - 1) / (COUNT / 100 + 1) ))
|
||||||
|
subdir="$TARGET/dir$(printf "%03d" "$dir_index")"
|
||||||
|
mkdir -p "$subdir"
|
||||||
|
echo "This is file #$i" > "$subdir/file_$i.txt"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "✅ Done: $(find "$TARGET" -type f | wc -l) files created."
|
@@ -1,4 +1,5 @@
|
|||||||
// src/cli.rs
|
// src/cli.rs
|
||||||
|
|
||||||
pub mod link;
|
pub mod link;
|
||||||
pub mod coll;
|
pub mod coll;
|
||||||
pub mod view;
|
pub mod view;
|
||||||
@@ -42,6 +43,10 @@ pub enum Commands {
|
|||||||
|
|
||||||
/// Scan one or more directories and populate the file index
|
/// Scan one or more directories and populate the file index
|
||||||
Scan {
|
Scan {
|
||||||
|
/// Only re-index files marked dirty by `marlin watch`
|
||||||
|
#[arg(long)]
|
||||||
|
dirty: bool,
|
||||||
|
|
||||||
/// Directories to scan (defaults to cwd)
|
/// Directories to scan (defaults to cwd)
|
||||||
paths: Vec<std::path::PathBuf>,
|
paths: Vec<std::path::PathBuf>,
|
||||||
},
|
},
|
||||||
|
@@ -16,6 +16,7 @@ use libmarlin::{
|
|||||||
scan,
|
scan,
|
||||||
utils::determine_scan_root,
|
utils::determine_scan_root,
|
||||||
};
|
};
|
||||||
|
use libmarlin::db::take_dirty;
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use clap::{CommandFactory, Parser};
|
use clap::{CommandFactory, Parser};
|
||||||
@@ -83,13 +84,31 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ---- scan ------------------------------------------------ */
|
/* ---- scan ------------------------------------------------ */
|
||||||
Commands::Scan { paths } => {
|
Commands::Scan { dirty, paths } => {
|
||||||
let scan_paths = if paths.is_empty() {
|
// Determine full-scan roots
|
||||||
|
let scan_paths: Vec<std::path::PathBuf> = if paths.is_empty() {
|
||||||
vec![env::current_dir()?]
|
vec![env::current_dir()?]
|
||||||
} else { paths };
|
} else {
|
||||||
|
paths.into_iter().collect()
|
||||||
|
};
|
||||||
|
|
||||||
for p in scan_paths {
|
if dirty {
|
||||||
scan::scan_directory(&mut conn, &p)?;
|
// Incremental: only re-index the files marked dirty
|
||||||
|
let dirty_ids = take_dirty(&conn)?;
|
||||||
|
for id in dirty_ids {
|
||||||
|
// look up each path by its file_id
|
||||||
|
let path: String = conn.query_row(
|
||||||
|
"SELECT path FROM files WHERE id = ?1",
|
||||||
|
[id],
|
||||||
|
|r| r.get(0),
|
||||||
|
)?;
|
||||||
|
scan::scan_directory(&mut conn, Path::new(&path))?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Full rescan of the given directories
|
||||||
|
for p in scan_paths {
|
||||||
|
scan::scan_directory(&mut conn, &p)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -12,23 +12,34 @@ We’ve landed a basic SQLite-backed `files` table and a contentless FTS5 index.
|
|||||||
- **Custom attributes** (`attributes`)
|
- **Custom attributes** (`attributes`)
|
||||||
- **File-to-file relationships** (`links`)
|
- **File-to-file relationships** (`links`)
|
||||||
- **Named collections** (`collections` + `collection_files`)
|
- **Named collections** (`collections` + `collection_files`)
|
||||||
- **Saved views** (`views`)
|
- **Saved views** (`saved_views`)
|
||||||
|
|
||||||
Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about.
|
Locking this schema now lets downstream CLI & GUI work against a stable model and ensures our migrations stay easy to reason about.
|
||||||
|
*Note: Tag aliases and their `canonical_id` support are deferred to DP-006 (v1.5).*
|
||||||
|
|
||||||
## 2. Decision
|
## 2. Decision
|
||||||
|
|
||||||
1. **Bump to schema version 1.1** in our migration table.
|
Each migration will begin by enabling foreign-key enforcement and WAL journaling:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
PRAGMA foreign_keys = ON;
|
||||||
|
PRAGMA journal_mode = WAL;
|
||||||
|
````
|
||||||
|
|
||||||
|
All foreign keys use `ON DELETE CASCADE` so deleting a file, tag, etc. automatically cleans up dependents.
|
||||||
|
|
||||||
|
1. **Bump to schema version 1.1** in our `schema_version` table.
|
||||||
2. Provide four migration scripts, applied in order:
|
2. Provide four migration scripts, applied in order:
|
||||||
1. `0001_initial_schema.sql` – create `files`, `tags`, `file_tags`, `attributes`, `files_fts`, core FTS triggers.
|
|
||||||
2. `0002_update_fts_and_triggers.sql` – replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh.
|
1. **0001\_initial\_schema.sql** – create core tables (`files`, `tags`, `file_tags`, `attributes`), a contentless FTS5 table (`files_fts`), core FTS triggers, and performance-critical indexes.
|
||||||
3. `0003_create_links_collections_views.sql` – introduce `links`, `collections`, `collection_files`, `views` tables.
|
2. **0002\_update\_fts\_and\_triggers.sql** – replace old tag/attr FTS triggers with `INSERT OR REPLACE` semantics for full-row refresh.
|
||||||
4. `0004_fix_hierarchical_tags_fts.sql` – refine FTS triggers to index full hierarchical tag-paths via a recursive CTE.
|
3. **0003\_create\_links\_collections\_saved\_views.sql** – introduce `links`, `collections`, `collection_files`, and `saved_views` tables.
|
||||||
|
4. **0004\_fix\_hierarchical\_tags\_fts.sql** – refine FTS triggers to index full hierarchical tag-paths via a recursive CTE.
|
||||||
3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store.
|
3. Expose this schema through our library (`libmarlin::db::open`) so any client sees a v1.1 store.
|
||||||
|
|
||||||
## 3. ER Diagram
|
## 3. ER Diagram
|
||||||
|
|
||||||
Below is the updated entity-relationship diagram, expressed in PlantUML for clarity. It shows all of the core metadata domains and their relationships:
|
Below is the updated entity-relationship diagram (PlantUML):
|
||||||
|
|
||||||
```plantuml
|
```plantuml
|
||||||
@startuml
|
@startuml
|
||||||
@@ -42,11 +53,10 @@ entity files {
|
|||||||
}
|
}
|
||||||
|
|
||||||
entity tags {
|
entity tags {
|
||||||
* id : INTEGER <<PK>>
|
* id : INTEGER <<PK>>
|
||||||
--
|
--
|
||||||
name : TEXT
|
name : TEXT
|
||||||
parent_id : INTEGER <<FK>>
|
parent_id : INTEGER <<FK>>
|
||||||
canonical_id : INTEGER <<FK>>
|
|
||||||
}
|
}
|
||||||
|
|
||||||
entity file_tags {
|
entity file_tags {
|
||||||
@@ -63,7 +73,7 @@ entity attributes {
|
|||||||
}
|
}
|
||||||
|
|
||||||
entity links {
|
entity links {
|
||||||
* id : INTEGER <<PK>>
|
* id : INTEGER <<PK>>
|
||||||
--
|
--
|
||||||
src_file_id : INTEGER <<FK>>
|
src_file_id : INTEGER <<FK>>
|
||||||
dst_file_id : INTEGER <<FK>>
|
dst_file_id : INTEGER <<FK>>
|
||||||
@@ -81,7 +91,7 @@ entity collection_files {
|
|||||||
* file_id : INTEGER <<FK>>
|
* file_id : INTEGER <<FK>>
|
||||||
}
|
}
|
||||||
|
|
||||||
entity views {
|
entity saved_views {
|
||||||
* id : INTEGER <<PK>>
|
* id : INTEGER <<PK>>
|
||||||
--
|
--
|
||||||
name : TEXT
|
name : TEXT
|
||||||
@@ -99,11 +109,11 @@ files ||--o{ links : "dst_file_id"
|
|||||||
collections ||--o{ collection_files
|
collections ||--o{ collection_files
|
||||||
files ||--o{ collection_files
|
files ||--o{ collection_files
|
||||||
|
|
||||||
views ||..|| files : "smart queries (via FTS)"
|
saved_views ||..|| files : "exec via FTS"
|
||||||
@enduml
|
@enduml
|
||||||
````
|
```
|
||||||
|
|
||||||
*(If you prefer a plain‐ASCII sketch, you can replace the above PlantUML block with:)*
|
Or in plain-ASCII:
|
||||||
|
|
||||||
```ascii
|
```ascii
|
||||||
┌────────┐ ┌────────────┐ ┌───────┐
|
┌────────┐ ┌────────────┐ ┌───────┐
|
||||||
@@ -124,19 +134,28 @@ views ||..|| files : "smart queries (via FTS)"
|
|||||||
│ collections │1──*─│ collection_files │*──1─│ files │
|
│ collections │1──*─│ collection_files │*──1─│ files │
|
||||||
└─────────────┘ └──────────────────┘ └────────┘
|
└─────────────┘ └──────────────────┘ └────────┘
|
||||||
|
|
||||||
┌───────┐
|
┌─────────────┐
|
||||||
│ views │
|
│ saved_views │
|
||||||
└───────┘
|
│ (exec FTS) │
|
||||||
|
└─────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
## 4. Migration Summary
|
## 4. Migration Summary
|
||||||
|
|
||||||
| File | Purpose |
|
| File | Purpose |
|
||||||
| ----------------------------------------------- | ------------------------------------------------------- |
|
| ------------------------------------------------------ | ------------------------------------------------------------- |
|
||||||
| **0001\_initial\_schema.sql** | Core tables + contentless FTS + path/triggers |
|
| **0001\_initial\_schema.sql** | Core tables + contentless FTS + core triggers + indexes |
|
||||||
| **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes |
|
| **0002\_update\_fts\_and\_triggers.sql** | Full-row FTS refresh on tag/attr changes |
|
||||||
| **0003\_create\_links\_collections\_views.sql** | Add `links`, `collections`, `collection_files`, `views` |
|
| **0003\_create\_links\_collections\_saved\_views.sql** | Add `links`, `collections`, `collection_files`, `saved_views` |
|
||||||
| **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full path tag indexing |
|
| **0004\_fix\_hierarchical\_tags\_fts.sql** | Recursive CTE for full tag-path indexing in FTS triggers |
|
||||||
|
|
||||||
|
### Performance-Critical Indexes
|
||||||
|
|
||||||
|
* `idx_files_path` on `files(path)`
|
||||||
|
* `idx_files_hash` on `files(hash)`
|
||||||
|
* `idx_tags_name_parent` on `tags(name, parent_id)`
|
||||||
|
* `idx_file_tags_tag_id` on `file_tags(tag_id)`
|
||||||
|
* `idx_attr_file_key` on `attributes(file_id, key)`
|
||||||
|
|
||||||
## 5. Example CLI Session
|
## 5. Example CLI Session
|
||||||
|
|
||||||
@@ -156,6 +175,10 @@ Saved view 'tasks' = tag:project AND TODO
|
|||||||
|
|
||||||
$ marlin view list
|
$ marlin view list
|
||||||
tasks: tag:project AND TODO
|
tasks: tag:project AND TODO
|
||||||
|
|
||||||
|
$ marlin view exec tasks
|
||||||
|
~/Projects/Alpha/draft1.md
|
||||||
|
~/Projects/Beta/final.md
|
||||||
```
|
```
|
||||||
|
|
||||||
## 6. Consequences
|
## 6. Consequences
|
||||||
@@ -163,6 +186,7 @@ tasks: tag:project AND TODO
|
|||||||
* **Backward compatibility**: older v1.0 stores will be migrated on first open.
|
* **Backward compatibility**: older v1.0 stores will be migrated on first open.
|
||||||
* **Stability**: downstream features (TUI, VS Code, web UI) can depend on a stable v1.1 schema.
|
* **Stability**: downstream features (TUI, VS Code, web UI) can depend on a stable v1.1 schema.
|
||||||
* **Simplicity**: by consolidating metadata domains now, future migrations remain small and focused.
|
* **Simplicity**: by consolidating metadata domains now, future migrations remain small and focused.
|
||||||
|
* **Performance**: v1.1 schema meets our cold-start P95 ≤ 3 s on a 100 k-file corpus (with CI-enforced benchmarks and the indexes above).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
8
libmarlin/src/db/migrations/0005_add_dirty_table.sql
Normal file
8
libmarlin/src/db/migrations/0005_add_dirty_table.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
PRAGMA foreign_keys = ON;
|
||||||
|
PRAGMA journal_mode = WAL;
|
||||||
|
|
||||||
|
-- Track which files need re-indexing
|
||||||
|
CREATE TABLE IF NOT EXISTS file_changes (
|
||||||
|
file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
|
||||||
|
marked_at INTEGER NOT NULL -- UNIX timestamp
|
||||||
|
);
|
@@ -15,7 +15,7 @@ use rusqlite::{
|
|||||||
Connection,
|
Connection,
|
||||||
OpenFlags,
|
OpenFlags,
|
||||||
OptionalExtension,
|
OptionalExtension,
|
||||||
TransactionBehavior,
|
TransactionBehavior,
|
||||||
};
|
};
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
@@ -26,6 +26,7 @@ const MIGRATIONS: &[(&str, &str)] = &[
|
|||||||
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
|
("0002_update_fts_and_triggers.sql", include_str!("migrations/0002_update_fts_and_triggers.sql")),
|
||||||
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
|
("0003_create_links_collections_views.sql", include_str!("migrations/0003_create_links_collections_views.sql")),
|
||||||
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
|
("0004_fix_hierarchical_tags_fts.sql", include_str!("migrations/0004_fix_hierarchical_tags_fts.sql")),
|
||||||
|
("0005_add_dirty_table.sql", include_str!("migrations/0005_add_dirty_table.sql")),
|
||||||
];
|
];
|
||||||
|
|
||||||
/* ─── connection bootstrap ────────────────────────────────────────── */
|
/* ─── connection bootstrap ────────────────────────────────────────── */
|
||||||
@@ -39,13 +40,12 @@ pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Connection> {
|
|||||||
conn.pragma_update(None, "foreign_keys", "ON")?;
|
conn.pragma_update(None, "foreign_keys", "ON")?;
|
||||||
|
|
||||||
// Wait up to 30 s for a competing writer before giving up
|
// Wait up to 30 s for a competing writer before giving up
|
||||||
conn.busy_timeout(std::time::Duration::from_secs(30))?; // ← tweaked
|
conn.busy_timeout(std::time::Duration::from_secs(30))?;
|
||||||
|
|
||||||
apply_migrations(&mut conn)?;
|
apply_migrations(&mut conn)?;
|
||||||
Ok(conn)
|
Ok(conn)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ─── migration runner ────────────────────────────────────────────── */
|
/* ─── migration runner ────────────────────────────────────────────── */
|
||||||
|
|
||||||
pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
|
pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
|
||||||
@@ -85,7 +85,7 @@ pub(crate) fn apply_migrations(conn: &mut Connection) -> Result<()> {
|
|||||||
|
|
||||||
info!("applying migration {}", fname);
|
info!("applying migration {}", fname);
|
||||||
tx.execute_batch(sql)
|
tx.execute_batch(sql)
|
||||||
.with_context(|| format!("could not apply migration {fname}"))?;
|
.with_context(|| format!("could not apply migration {}", fname))?;
|
||||||
|
|
||||||
tx.execute(
|
tx.execute(
|
||||||
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
|
"INSERT INTO schema_version (version, applied_on) VALUES (?1, ?2)",
|
||||||
@@ -158,7 +158,12 @@ pub fn upsert_attr(conn: &Connection, file_id: i64, key: &str, value: &str) -> R
|
|||||||
|
|
||||||
/* ─── links ───────────────────────────────────────────────────────── */
|
/* ─── links ───────────────────────────────────────────────────────── */
|
||||||
|
|
||||||
pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
|
pub fn add_link(
|
||||||
|
conn: &Connection,
|
||||||
|
src_file_id: i64,
|
||||||
|
dst_file_id: i64,
|
||||||
|
link_type: Option<&str>,
|
||||||
|
) -> Result<()> {
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO links(src_file_id, dst_file_id, type)
|
"INSERT INTO links(src_file_id, dst_file_id, type)
|
||||||
VALUES (?1, ?2, ?3)
|
VALUES (?1, ?2, ?3)
|
||||||
@@ -168,7 +173,12 @@ pub fn add_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove_link(conn: &Connection, src_file_id: i64, dst_file_id: i64, link_type: Option<&str>) -> Result<()> {
|
pub fn remove_link(
|
||||||
|
conn: &Connection,
|
||||||
|
src_file_id: i64,
|
||||||
|
dst_file_id: i64,
|
||||||
|
link_type: Option<&str>,
|
||||||
|
) -> Result<()> {
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"DELETE FROM links
|
"DELETE FROM links
|
||||||
WHERE src_file_id = ?1
|
WHERE src_file_id = ?1
|
||||||
@@ -190,8 +200,10 @@ pub fn list_links(
|
|||||||
// Files matching pattern
|
// Files matching pattern
|
||||||
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
|
let mut stmt = conn.prepare("SELECT id, path FROM files WHERE path LIKE ?1")?;
|
||||||
let rows = stmt
|
let rows = stmt
|
||||||
.query_map(params![like_pattern], |r| Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)))?
|
.query_map(params![like_pattern], |r| {
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?))
|
||||||
|
})?
|
||||||
|
.collect::<StdResult<Vec<_>, _>>()?;
|
||||||
|
|
||||||
let mut out = Vec::new();
|
let mut out = Vec::new();
|
||||||
for (fid, fpath) in rows {
|
for (fid, fpath) in rows {
|
||||||
@@ -210,8 +222,10 @@ pub fn list_links(
|
|||||||
|
|
||||||
let mut stmt2 = conn.prepare(&sql)?;
|
let mut stmt2 = conn.prepare(&sql)?;
|
||||||
let links = stmt2
|
let links = stmt2
|
||||||
.query_map(params![fid, link_type], |r| Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?)))?
|
.query_map(params![fid, link_type], |r| {
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
|
||||||
|
})?
|
||||||
|
.collect::<StdResult<Vec<_>, _>>()?;
|
||||||
|
|
||||||
for (other, typ) in links {
|
for (other, typ) in links {
|
||||||
out.push((fpath.clone(), other, typ));
|
out.push((fpath.clone(), other, typ));
|
||||||
@@ -238,11 +252,11 @@ pub fn find_backlinks(
|
|||||||
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
|
Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let out = rows.collect::<StdResult<Vec<_>, _>>()?; // rusqlite → anyhow via `?`
|
let out = rows.collect::<StdResult<Vec<_>, _>>()?;
|
||||||
Ok(out)
|
Ok(out)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ─── NEW: collections helpers ────────────────────────────────────── */
|
/* ─── collections helpers ────────────────────────────────────────── */
|
||||||
|
|
||||||
pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> {
|
pub fn ensure_collection(conn: &Connection, name: &str) -> Result<i64> {
|
||||||
conn.execute(
|
conn.execute(
|
||||||
@@ -281,7 +295,7 @@ pub fn list_collection(conn: &Connection, name: &str) -> Result<Vec<String>> {
|
|||||||
Ok(list)
|
Ok(list)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ─── NEW: saved views (smart folders) ────────────────────────────── */
|
/* ─── saved views (smart folders) ───────────────────────────────── */
|
||||||
|
|
||||||
pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
|
pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
|
||||||
conn.execute(
|
conn.execute(
|
||||||
@@ -295,7 +309,6 @@ pub fn save_view(conn: &Connection, name: &str, query: &str) -> Result<()> {
|
|||||||
|
|
||||||
pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> {
|
pub fn list_views(conn: &Connection) -> Result<Vec<(String, String)>> {
|
||||||
let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
|
let mut stmt = conn.prepare("SELECT name, query FROM views ORDER BY name")?;
|
||||||
|
|
||||||
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
|
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
|
||||||
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
|
let list = rows.collect::<StdResult<Vec<_>, _>>()?;
|
||||||
Ok(list)
|
Ok(list)
|
||||||
@@ -307,7 +320,32 @@ pub fn view_query(conn: &Connection, name: &str) -> Result<String> {
|
|||||||
[name],
|
[name],
|
||||||
|r| r.get::<_, String>(0),
|
|r| r.get::<_, String>(0),
|
||||||
)
|
)
|
||||||
.context(format!("no view called '{name}'"))
|
.context(format!("no view called '{}'", name))
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ─── dirty‐scan helpers ─────────────────────────────────────────── */
|
||||||
|
|
||||||
|
/// Mark a file as “dirty” so it’ll be picked up by `scan_dirty`.
|
||||||
|
pub fn mark_dirty(conn: &Connection, file_id: i64) -> Result<()> {
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO file_changes(file_id, marked_at)
|
||||||
|
VALUES (?1, strftime('%s','now'))",
|
||||||
|
params![file_id],
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take and clear all dirty file IDs for incremental re-scan.
|
||||||
|
pub fn take_dirty(conn: &Connection) -> Result<Vec<i64>> {
|
||||||
|
let mut ids = Vec::new();
|
||||||
|
{
|
||||||
|
let mut stmt = conn.prepare("SELECT file_id FROM file_changes")?;
|
||||||
|
for row in stmt.query_map([], |r| r.get(0))? {
|
||||||
|
ids.push(row?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
conn.execute("DELETE FROM file_changes", [])?;
|
||||||
|
Ok(ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ─── backup / restore helpers ────────────────────────────────────── */
|
/* ─── backup / restore helpers ────────────────────────────────────── */
|
||||||
|
@@ -11,7 +11,10 @@ use walkdir::WalkDir;
|
|||||||
/// Recursively walk `root` and upsert file metadata.
|
/// Recursively walk `root` and upsert file metadata.
|
||||||
/// Triggers keep the FTS table in sync.
|
/// Triggers keep the FTS table in sync.
|
||||||
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
|
pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
|
||||||
|
// Begin a transaction so we batch many inserts/updates together
|
||||||
let tx = conn.transaction()?;
|
let tx = conn.transaction()?;
|
||||||
|
|
||||||
|
// Prepare the upsert statement once
|
||||||
let mut stmt = tx.prepare(
|
let mut stmt = tx.prepare(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO files(path, size, mtime)
|
INSERT INTO files(path, size, mtime)
|
||||||
@@ -23,12 +26,15 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut count = 0usize;
|
let mut count = 0usize;
|
||||||
|
|
||||||
|
// Walk the directory recursively
|
||||||
for entry in WalkDir::new(root)
|
for entry in WalkDir::new(root)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(Result::ok)
|
.filter_map(Result::ok)
|
||||||
.filter(|e| e.file_type().is_file())
|
.filter(|e| e.file_type().is_file())
|
||||||
{
|
{
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
|
|
||||||
// Skip the database file and its WAL/SHM siblings
|
// Skip the database file and its WAL/SHM siblings
|
||||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||||
if name.ends_with(".db") || name.ends_with("-wal") || name.ends_with("-shm") {
|
if name.ends_with(".db") || name.ends_with("-wal") || name.ends_with("-shm") {
|
||||||
@@ -36,6 +42,7 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gather file metadata
|
||||||
let meta = fs::metadata(path)?;
|
let meta = fs::metadata(path)?;
|
||||||
let size = meta.len() as i64;
|
let size = meta.len() as i64;
|
||||||
let mtime = meta
|
let mtime = meta
|
||||||
@@ -43,14 +50,18 @@ pub fn scan_directory(conn: &mut Connection, root: &Path) -> Result<usize> {
|
|||||||
.duration_since(std::time::UNIX_EPOCH)?
|
.duration_since(std::time::UNIX_EPOCH)?
|
||||||
.as_secs() as i64;
|
.as_secs() as i64;
|
||||||
|
|
||||||
|
// Execute the upsert
|
||||||
let path_str = path.to_string_lossy();
|
let path_str = path.to_string_lossy();
|
||||||
stmt.execute(params![path_str, size, mtime])?;
|
stmt.execute(params![path_str, size, mtime])?;
|
||||||
count += 1;
|
count += 1;
|
||||||
|
|
||||||
debug!(file = %path_str, "indexed");
|
debug!(file = %path_str, "indexed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finalize and commit
|
||||||
drop(stmt);
|
drop(stmt);
|
||||||
tx.commit()?;
|
tx.commit()?;
|
||||||
|
|
||||||
info!(indexed = count, "scan complete");
|
info!(indexed = count, "scan complete");
|
||||||
Ok(count)
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
@@ -1 +1 @@
|
|||||||
{"rustc_fingerprint":17558195974417946175,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"x87\"\ntarget_has_atomic\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_has_reliable_f128\ntarget_has_reliable_f16\ntarget_has_reliable_f16_math\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\nub_checks\nunix\n","stderr":""},"10431901537437931773":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"x87\"\ntarget_has_atomic\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_has_reliable_f128\ntarget_has_reliable_f16\ntarget_has_reliable_f16_math\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\ntarpaulin\nub_checks\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.89.0-nightly (777d37277 2025-05-17)\nbinary: rustc\ncommit-hash: 777d372772aa3b39ba7273fcb8208a89f2ab0afd\ncommit-date: 2025-05-17\nhost: x86_64-unknown-linux-gnu\nrelease: 1.89.0-nightly\nLLVM version: 20.1.4\n","stderr":""}},"successes":{}}
|
{"rustc_fingerprint":10768506583288887294,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/user/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.86.0 (05f9846f8 2025-03-31)\nbinary: rustc\ncommit-hash: 05f9846f893b09a1be1fc8560e33fc3c815cfecb\ncommit-date: 2025-03-31\nhost: x86_64-unknown-linux-gnu\nrelease: 1.86.0\nLLVM version: 19.1.7\n","stderr":""}},"successes":{}}
|
Binary file not shown.
@@ -1 +1 @@
|
|||||||
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/cli-bin/build.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/main.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/config.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/lib.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/logging.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/scan.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/utils.rs
|
/home/user/Documents/GitHub/Marlin/target/release/marlin: /home/user/Documents/GitHub/Marlin/cli-bin/build.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/annotate.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/coll.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/event.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/link.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/remind.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/state.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/task.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/version.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli/view.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/cli.rs /home/user/Documents/GitHub/Marlin/cli-bin/src/main.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/config.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0001_initial_schema.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0002_update_fts_and_triggers.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0003_create_links_collections_views.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0004_fix_hierarchical_tags_fts.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/migrations/0005_add_dirty_table.sql /home/user/Documents/GitHub/Marlin/libmarlin/src/db/mod.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/lib.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/logging.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/scan.rs /home/user/Documents/GitHub/Marlin/libmarlin/src/utils.rs
|
||||||
|
Reference in New Issue
Block a user