#!/usr/bin/env bash set -euo pipefail # ============================================================ # make_audiobook.sh # One-shot installer + large-text → MP3 audiobook using Piper # Works on Linux Mint / Ubuntu (needs sudo for apt installs) # ============================================================ # ----------------------- # CONFIG (edit as needed) # ----------------------- # Piper model (female by default). Set MODEL_FILE to a local .onnx if you already downloaded one. # Otherwise, the script will try to download the URLs below (you can swap them with your preferred voice). MODEL_NAME="en_US-hfc_male-medium" MODEL_DIR="${HOME}/.local/share/piper/voices" MODEL_FILE="${MODEL_DIR}/${MODEL_NAME}.onnx" MODEL_JSON="${MODEL_FILE}.json" MODEL_ONNX_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/hfc_male/medium/en_US-hfc_male-medium.onnx" MODEL_JSON_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/hfc_male/medium/en_US-hfc_male-medium.onnx.json" # Piper synthesis knobs (tune to taste) LENGTH_SCALE="1.0" # >1.0 = slower, <1.0 = faster NOISE_SCALE="0.60" # lower = flatter; higher = more expressive MAX_CHARS="3000" # characters per chunk sent to Piper SILENCE_MS="300" # silence between chunks (ms) # MP3 quality (VBR via -q:a: 0=best, 2~high, 5=medium) MP3_Q="2" # ----------------------- # END CONFIG # ----------------------- # --- helpers --- say() { printf "\n\033[1;32m%s\033[0m\n" "$*"; } warn() { printf "\n\033[1;33m%s\033[0m\n" "$*"; } die() { printf "\n\033[1;31mERROR:\033[0m %s\n" "$*" >&2; exit 1; } # --- args --- if [[ $# -lt 1 ]]; then cat >&2 </dev/null 2>&1; then die "pipx not found after install. Please re-open your terminal and re-run this script." fi pipx ensurepath >/dev/null 2>&1 || true # shellcheck disable=SC1090 source "${HOME}/.profile" 2>/dev/null || true hash -r # Install Piper CLI via pipx if missing if ! command -v piper >/dev/null 2>&1; then say "Installing Piper CLI with pipx…" pipx install piper-tts fi command -v piper >/dev/null 2>&1 || die "Piper CLI not found on PATH after install." # --- model setup --- mkdir -p "${MODEL_DIR}" if [[ -n "${CUSTOM_MODEL}" ]]; then [[ -f "$CUSTOM_MODEL" ]] || die "Custom model not found: $CUSTOM_MODEL" MODEL_FILE="$(realpath "$CUSTOM_MODEL")" MODEL_JSON="${MODEL_FILE}.json" [[ -f "$MODEL_JSON" ]] || warn "Note: ${MODEL_JSON} not found. Piper usually works without it, but voice metadata will be missing." else if [[ ! -f "$MODEL_FILE" ]]; then say "Downloading Piper model: ${MODEL_NAME}" wget -O "${MODEL_FILE}" "${MODEL_ONNX_URL}" || die "Failed to download model .onnx" fi if [[ ! -f "$MODEL_JSON" ]]; then say "Downloading Piper model metadata (.json)…" wget -O "${MODEL_JSON}" "${MODEL_JSON_URL}" || warn "Could not download ${MODEL_JSON}. Continuing without metadata." fi fi # --- temp working dir --- WORKDIR="$(mktemp -d -t piperbook-XXXXXX)" trap 'rm -rf "$WORKDIR"' EXIT # --- python chunk + synth --- say "Starting synthesis with Piper (this can take a while)…" PY_SCRIPT="${WORKDIR}/run_piper.py" cat > "${PY_SCRIPT}" <<'PYCODE' import argparse, os, re, subprocess, sys, tempfile, shutil def split_sentences(text: str): text = re.sub(r'\s+', ' ', text.strip()) # basic sentence split: keep punctuation; avoid splitting on common decimals/abbrevs heuristically pieces = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text) return [p.strip() for p in pieces if p.strip()] def chunk(sentences, max_chars=3000): buf, n = [], 0 for s in sentences: if n + len(s) + 1 > max_chars and buf: yield ' '.join(buf) buf, n = [], 0 buf.append(s) n += len(s) + 1 if buf: yield ' '.join(buf) def main(): ap = argparse.ArgumentParser() ap.add_argument("--input", required=True) ap.add_argument("--model", required=True) ap.add_argument("--outdir", required=True) ap.add_argument("--length_scale", type=float, default=1.0) ap.add_argument("--noise_scale", type=float, default=0.667) ap.add_argument("--max_chars", type=int, default=3000) ap.add_argument("--silence_ms", type=int, default=300) args = ap.parse_args() if not shutil.which("piper"): sys.exit("piper not found in PATH") if not shutil.which("ffmpeg"): sys.exit("ffmpeg not found in PATH") with open(args.input, "r", encoding="utf-8") as f: raw = f.read() sents = split_sentences(raw) chunks = list(chunk(sents, max_chars=args.max_chars)) if not chunks: sys.exit("Nothing to synthesize.") os.makedirs(args.outdir, exist_ok=True) wavs = [] for i, text in enumerate(chunks): wav_path = os.path.join(args.outdir, f"part_{i:05d}.wav") p = subprocess.run( ["piper", "--model", args.model, "--length_scale", str(args.length_scale), "--noise_scale", str(args.noise_scale), "--output_file", wav_path], input=text.encode("utf-8"), stdout=subprocess.PIPE, stderr=subprocess.PIPE) if p.returncode != 0 or not os.path.exists(wav_path): sys.stderr.write(p.stderr.decode("utf-8", errors="ignore")) sys.exit(f"Piper failed on chunk {i}") wavs.append(wav_path) # Build concat list with optional silence list_path = os.path.join(args.outdir, "concat.txt") with open(list_path, "w", encoding="utf-8") as lf: for i, w in enumerate(wavs): lf.write(f"file '{w}'\n") if i != len(wavs)-1 and args.silence_ms > 0: sil = os.path.join(args.outdir, f"sil_{i:05d}.wav") # 22.05k mono silence subprocess.run([ "ffmpeg","-f","lavfi","-i", f"anullsrc=r=22050:cl=mono", "-t", str(args.silence_ms/1000.0), "-y", sil ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) lf.write(f"file '{sil}'\n") # Concatenate WAVs losslessly joined_wav = os.path.join(args.outdir, "joined.wav") subprocess.run([ "ffmpeg","-f","concat","-safe","0","-i", list_path, "-c","copy","-y", joined_wav ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) print(joined_wav) if __name__ == "__main__": main() PYCODE # Run the Python worker to synthesize chunks → joined.wav JOINED_WAV="$( python3 "${PY_SCRIPT}" \ --input "${INPUT_TXT}" \ --model "${MODEL_FILE}" \ --outdir "${WORKDIR}/audio" \ --length_scale "${LENGTH_SCALE}" \ --noise_scale "${NOISE_SCALE}" \ --max_chars "${MAX_CHARS}" \ --silence_ms "${SILENCE_MS}" )" [[ -f "${JOINED_WAV}" ]] || die "Synthesis did not produce ${JOINED_WAV}" # Convert to MP3 say "Encoding MP3 → ${OUTPUT_MP3}" ffmpeg -y -i "${JOINED_WAV}" -codec:a libmp3lame -q:a "${MP3_Q}" "${OUTPUT_MP3}" >/dev/null 2>&1 say "Done! MP3 written to:" echo " ${OUTPUT_MP3}" echo warn "Tip: To use a different voice, edit the CONFIG section at the top or pass --model /path/to/voice.onnx" warn "Latest voice list (copy links): https://github.com/rhasspy/piper/blob/master/VOICES.md"