From bca4d14480b70d43f0b003dbbda2742b659cab65 Mon Sep 17 00:00:00 2001 From: thePR0M3TH3AN <53631862+PR0M3TH3AN@users.noreply.github.com> Date: Fri, 8 Aug 2025 11:49:19 -0400 Subject: [PATCH] update --- README.md | 172 ++++++++++++++++++++++++++++++++++ make_audiobook.sh | 233 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 405 insertions(+) create mode 100644 README.md create mode 100755 make_audiobook.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..4fbd83a --- /dev/null +++ b/README.md @@ -0,0 +1,172 @@ +# Piper-TTS Audiobook Script + +This project is a **one-command solution** to convert large `.txt` documents into high-quality **MP3 audiobooks** entirely **offline** using [Piper](https://github.com/rhasspy/piper) β€” a fast, local, open-source text-to-speech engine. + +The script: +- Installs all prerequisites (`pipx`, `ffmpeg`, `piper-tts`) +- Downloads a chosen Piper voice model (default: **female UK English β€” en_GB-cori-high**) +- Splits large text into manageable chunks for Piper +- Synthesizes each chunk into audio +- Stitches the chunks together into a single **MP3** file + +--- + +## πŸ“‚ Project Structure + +``` + +Piper-TTS-Script/ +β”œβ”€β”€ make\_audiobook.sh # Main script +└── README.md # This file + +```` + +--- + +## βš™οΈ Requirements + +- **Linux Mint / Ubuntu** (or other Debian-based distro) +- Internet connection for the first run (to install packages and download the voice model) +- A `.txt` file containing your book or text to be converted + +--- + +## πŸ“₯ Installation + +1. **Clone or download this repo**: + ```bash + cd ~/Documents/GitHub + git clone https://github.com/yourusername/Piper-TTS-Script.git +```` + +*(Or just manually create the folder and place `make_audiobook.sh` inside)* + +2. **Make the script executable**: + + ```bash + chmod +x ~/Documents/GitHub/Piper-TTS-Script/make_audiobook.sh + ``` + +--- + +## πŸ—‚ Preparing Your Text File + +Place your text file somewhere accessible, e.g.: + +``` +~/Documents/audiobooks/mybook.txt +``` + +If you have a PDF or EPUB, convert it to plain text first: + +```bash +# PDF to TXT +pdftotext mybook.pdf mybook.txt + +# EPUB to TXT (requires calibre) +ebook-convert mybook.epub mybook.txt +``` + +--- + +## πŸš€ Usage + +Basic usage: + +```bash +~/Documents/GitHub/Piper-TTS-Script/make_audiobook.sh ~/Documents/audiobooks/mybook.txt +``` + +* **First run**: Installs all dependencies and downloads the default voice model. +* **Output**: An MP3 file in the same directory as your text: + + ``` + ~/Documents/audiobooks/mybook.mp3 + ``` + +--- + +## πŸ”§ Configuration + +At the top of `make_audiobook.sh` there is a **CONFIG** section where you can adjust: + +| Setting | Description | +| ----------------------------------- | ------------------------------------------------------ | +| `MODEL_NAME` | Name of the Piper voice model | +| `MODEL_FILE` | Path to your `.onnx` model file | +| `MODEL_ONNX_URL` / `MODEL_JSON_URL` | Download URLs for the model and metadata | +| `LENGTH_SCALE` | Speech speed (1.0 = normal, <1 faster, >1 slower) | +| `NOISE_SCALE` | Expressiveness (lower = flatter, higher = more varied) | +| `MAX_CHARS` | Max characters per chunk sent to Piper | +| `SILENCE_MS` | Silence gap between chunks in milliseconds | +| `MP3_Q` | MP3 quality (VBR: 0=best, 2=high, 5=medium) | + +--- + +## 🎀 Changing the Voice + +This script defaults to: + +* **Female UK English** β€” `en_GB-cori-high` + +To change: + +1. Visit the official Piper voice list: + [https://github.com/rhasspy/piper/blob/master/VOICES.md](https://github.com/rhasspy/piper/blob/master/VOICES.md) + +2. Copy the `.onnx` and `.onnx.json` URLs for your chosen voice. + +3. Edit the **CONFIG** section in `make_audiobook.sh` with the new: + + * `MODEL_NAME` + * `MODEL_ONNX_URL` + * `MODEL_JSON_URL` + +Example for **Female US English (Lessac)**: + +```bash +MODEL_NAME="en_US-lessac-high" +MODEL_ONNX_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx" +MODEL_JSON_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx.json" +``` + +--- + +## πŸ›  Advanced Usage + +Specify a custom voice model: + +```bash +~/Documents/GitHub/Piper-TTS-Script/make_audiobook.sh ~/Documents/audiobooks/mybook.txt \ + --model ~/.local/share/piper/voices/en_US-lessac-high.onnx +``` + +Name the output file: + +```bash +~/Documents/GitHub/Piper-TTS-Script/make_audiobook.sh ~/Documents/audiobooks/mybook.txt \ + --name my_custom_audiobook.mp3 +``` + +--- + +## ⚠️ Limitations + +* For **extremely large books** (>200k characters), processing can still take a while even with chunking. +* If the voice model URLs change, you’ll need to update them from [VOICES.md](https://github.com/rhasspy/piper/blob/master/VOICES.md). +* Piper’s pronunciation is very good but not perfect; some technical or foreign words may be read oddly. + +--- + +## πŸ“„ License + +This script is released under the MIT License. Piper itself is licensed under the Mozilla Public License 2.0. + +--- + +## πŸ™‹β€β™‚οΈ Credits + +* [Piper TTS](https://github.com/rhasspy/piper) β€” Open-source text-to-speech engine +* [ffmpeg](https://ffmpeg.org) β€” Audio conversion +* Script by **Your Name** + diff --git a/make_audiobook.sh b/make_audiobook.sh new file mode 100755 index 0000000..e19fb54 --- /dev/null +++ b/make_audiobook.sh @@ -0,0 +1,233 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ============================================================ +# make_audiobook.sh +# One-shot installer + large-text β†’ MP3 audiobook using Piper +# Works on Linux Mint / Ubuntu (needs sudo for apt installs) +# ============================================================ + +# ----------------------- +# CONFIG (edit as needed) +# ----------------------- +# Piper model (female by default). Set MODEL_FILE to a local .onnx if you already downloaded one. +# Otherwise, the script will try to download the URLs below (you can swap them with your preferred voice). +MODEL_NAME="en_GB-cori-high" # Example: UK English, female, high quality +MODEL_DIR="${HOME}/.local/share/piper/voices" +MODEL_FILE="${MODEL_DIR}/${MODEL_NAME}.onnx" +MODEL_JSON="${MODEL_FILE}.json" + +# If MODEL_FILE doesn't exist, try to fetch from these URLs: +MODEL_ONNX_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/cori/high/en_GB-cori-high.onnx" +MODEL_JSON_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/cori/high/en_GB-cori-high.onnx.json" + +# Piper synthesis knobs (tune to taste) +LENGTH_SCALE="1.0" # >1.0 = slower, <1.0 = faster +NOISE_SCALE="0.60" # lower = flatter; higher = more expressive +MAX_CHARS="3000" # characters per chunk sent to Piper +SILENCE_MS="300" # silence between chunks (ms) + +# MP3 quality (VBR via -q:a: 0=best, 2~high, 5=medium) +MP3_Q="2" + +# ----------------------- +# END CONFIG +# ----------------------- + +# --- helpers --- +say() { printf "\n\033[1;32m%s\033[0m\n" "$*"; } +warn() { printf "\n\033[1;33m%s\033[0m\n" "$*"; } +die() { printf "\n\033[1;31mERROR:\033[0m %s\n" "$*" >&2; exit 1; } + +# --- args --- +if [[ $# -lt 1 ]]; then + cat >&2 </dev/null 2>&1; then + die "pipx not found after install. Please re-open your terminal and re-run this script." +fi +pipx ensurepath >/dev/null 2>&1 || true +# shellcheck disable=SC1090 +source "${HOME}/.profile" 2>/dev/null || true +hash -r + +# Install Piper CLI via pipx if missing +if ! command -v piper >/dev/null 2>&1; then + say "Installing Piper CLI with pipx…" + pipx install piper-tts +fi + +command -v piper >/dev/null 2>&1 || die "Piper CLI not found on PATH after install." + +# --- model setup --- +mkdir -p "${MODEL_DIR}" + +if [[ -n "${CUSTOM_MODEL}" ]]; then + [[ -f "$CUSTOM_MODEL" ]] || die "Custom model not found: $CUSTOM_MODEL" + MODEL_FILE="$(realpath "$CUSTOM_MODEL")" + MODEL_JSON="${MODEL_FILE}.json" + [[ -f "$MODEL_JSON" ]] || warn "Note: ${MODEL_JSON} not found. Piper usually works without it, but voice metadata will be missing." +else + if [[ ! -f "$MODEL_FILE" ]]; then + say "Downloading Piper model: ${MODEL_NAME}" + wget -O "${MODEL_FILE}" "${MODEL_ONNX_URL}" || die "Failed to download model .onnx" + fi + if [[ ! -f "$MODEL_JSON" ]]; then + say "Downloading Piper model metadata (.json)…" + wget -O "${MODEL_JSON}" "${MODEL_JSON_URL}" || warn "Could not download ${MODEL_JSON}. Continuing without metadata." + fi +fi + +# --- temp working dir --- +WORKDIR="$(mktemp -d -t piperbook-XXXXXX)" +trap 'rm -rf "$WORKDIR"' EXIT + +# --- python chunk + synth --- +say "Starting synthesis with Piper (this can take a while)…" +PY_SCRIPT="${WORKDIR}/run_piper.py" +cat > "${PY_SCRIPT}" <<'PYCODE' +import argparse, os, re, subprocess, sys, tempfile, shutil + +def split_sentences(text: str): + text = re.sub(r'\s+', ' ', text.strip()) + # basic sentence split: keep punctuation; avoid splitting on common decimals/abbrevs heuristically + pieces = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text) + return [p.strip() for p in pieces if p.strip()] + +def chunk(sentences, max_chars=3000): + buf, n = [], 0 + for s in sentences: + if n + len(s) + 1 > max_chars and buf: + yield ' '.join(buf) + buf, n = [], 0 + buf.append(s) + n += len(s) + 1 + if buf: + yield ' '.join(buf) + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--input", required=True) + ap.add_argument("--model", required=True) + ap.add_argument("--outdir", required=True) + ap.add_argument("--length_scale", type=float, default=1.0) + ap.add_argument("--noise_scale", type=float, default=0.667) + ap.add_argument("--max_chars", type=int, default=3000) + ap.add_argument("--silence_ms", type=int, default=300) + args = ap.parse_args() + + if not shutil.which("piper"): + sys.exit("piper not found in PATH") + if not shutil.which("ffmpeg"): + sys.exit("ffmpeg not found in PATH") + + with open(args.input, "r", encoding="utf-8") as f: + raw = f.read() + sents = split_sentences(raw) + chunks = list(chunk(sents, max_chars=args.max_chars)) + if not chunks: + sys.exit("Nothing to synthesize.") + + os.makedirs(args.outdir, exist_ok=True) + wavs = [] + for i, text in enumerate(chunks): + wav_path = os.path.join(args.outdir, f"part_{i:05d}.wav") + p = subprocess.run( + ["piper", + "--model", args.model, + "--length_scale", str(args.length_scale), + "--noise_scale", str(args.noise_scale), + "--output_file", wav_path], + input=text.encode("utf-8"), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if p.returncode != 0 or not os.path.exists(wav_path): + sys.stderr.write(p.stderr.decode("utf-8", errors="ignore")) + sys.exit(f"Piper failed on chunk {i}") + wavs.append(wav_path) + + # Build concat list with optional silence + list_path = os.path.join(args.outdir, "concat.txt") + with open(list_path, "w", encoding="utf-8") as lf: + for i, w in enumerate(wavs): + lf.write(f"file '{w}'\n") + if i != len(wavs)-1 and args.silence_ms > 0: + sil = os.path.join(args.outdir, f"sil_{i:05d}.wav") + # 22.05k mono silence + subprocess.run([ + "ffmpeg","-f","lavfi","-i", + f"anullsrc=r=22050:cl=mono", + "-t", str(args.silence_ms/1000.0), + "-y", sil + ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) + lf.write(f"file '{sil}'\n") + + # Concatenate WAVs losslessly + joined_wav = os.path.join(args.outdir, "joined.wav") + subprocess.run([ + "ffmpeg","-f","concat","-safe","0","-i", list_path, + "-c","copy","-y", joined_wav + ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) + + print(joined_wav) + +if __name__ == "__main__": + main() +PYCODE + +# Run the Python worker to synthesize chunks β†’ joined.wav +JOINED_WAV="$( + python3 "${PY_SCRIPT}" \ + --input "${INPUT_TXT}" \ + --model "${MODEL_FILE}" \ + --outdir "${WORKDIR}/audio" \ + --length_scale "${LENGTH_SCALE}" \ + --noise_scale "${NOISE_SCALE}" \ + --max_chars "${MAX_CHARS}" \ + --silence_ms "${SILENCE_MS}" +)" + +[[ -f "${JOINED_WAV}" ]] || die "Synthesis did not produce ${JOINED_WAV}" + +# Convert to MP3 +say "Encoding MP3 β†’ ${OUTPUT_MP3}" +ffmpeg -y -i "${JOINED_WAV}" -codec:a libmp3lame -q:a "${MP3_Q}" "${OUTPUT_MP3}" >/dev/null 2>&1 + +say "Done! MP3 written to:" +echo " ${OUTPUT_MP3}" + +echo +warn "Tip: To use a different voice, edit the CONFIG section at the top or pass --model /path/to/voice.onnx" +warn "Latest voice list (copy links): https://github.com/rhasspy/piper/blob/master/VOICES.md"