mirror of
https://github.com/PR0M3TH3AN/Marlin.git
synced 2025-09-08 15:18:44 +00:00
31 lines
753 B
Bash
Executable File
31 lines
753 B
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# bench/gen-corpus.sh
|
|
#
|
|
# Generate a synthetic corpus of N files in nested directories.
|
|
# Defaults to 1 000 files so it stays laptop-friendly.
|
|
#
|
|
|
|
set -euo pipefail
|
|
IFS=$'\n\t'
|
|
|
|
# How many files? (default: 1 000)
|
|
COUNT=${COUNT:-100000}
|
|
# Where to put them
|
|
TARGET=${TARGET:-bench/corpus}
|
|
|
|
# Wipe any old corpus
|
|
rm -rf "$TARGET"
|
|
mkdir -p "$TARGET"
|
|
|
|
echo "🚀 Generating $COUNT files under $TARGET…"
|
|
for i in $(seq 1 "$COUNT"); do
|
|
# bucket into 100 sub-dirs so walkdir has some structure
|
|
dir_index=$(( (i - 1) / (COUNT / 100 + 1) ))
|
|
subdir="$TARGET/dir$(printf "%03d" "$dir_index")"
|
|
mkdir -p "$subdir"
|
|
echo "This is file #$i" > "$subdir/file_$i.txt"
|
|
done
|
|
|
|
echo "✅ Done: $(find "$TARGET" -type f | wc -l) files created."
|