mirror of
https://github.com/PR0M3TH3AN/VoxVera.git
synced 2025-09-07 14:38:42 +00:00
118 lines
4.0 KiB
Bash
118 lines
4.0 KiB
Bash
#!/bin/bash
|
|
|
|
# Define the input PDF, output text file, and output JSON
|
|
pdf_file="./from_client/submission_form.pdf"
|
|
text_file="submission_form.txt"
|
|
json_file="config.json"
|
|
|
|
# Log the start of the script
|
|
echo "Starting script..."
|
|
echo "Looking for PDF at: $pdf_file"
|
|
echo "Config JSON: $json_file"
|
|
|
|
# Convert PDF to text
|
|
echo "Converting PDF to text..."
|
|
pdftotext -layout "$pdf_file" "$text_file"
|
|
|
|
# Extract individual fields using grep, logging each step
|
|
echo "Extracting fields from the text file..."
|
|
|
|
# Adjusting the extraction of the name field
|
|
name=$(grep -A 3 'name (max 25 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted name: '$name'"
|
|
|
|
subdomain=$(grep -A 1 'subdomain (max 30 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted subdomain: '$subdomain'"
|
|
|
|
title=$(grep -A 1 'title (max 30 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted title: '$title'"
|
|
|
|
subtitle=$(grep -A 1 'subtitle (max 45 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted subtitle: '$subtitle'"
|
|
|
|
headline=$(grep -A 1 'headline (max 50 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted headline: '$headline'"
|
|
|
|
# Process the content to remove soft wraps while keeping hard returns
|
|
content=$(grep -A 100 'content (max 1,400 characters):' "$text_file" | sed -n '2,/url_message/p' | sed '$d' | awk '
|
|
{
|
|
if (NR == 1) {
|
|
prev = $0;
|
|
next;
|
|
}
|
|
|
|
# Handle paragraph breaks explicitly
|
|
if ($0 ~ /^[[:space:]]*$/) {
|
|
if (prev) print prev "\n";
|
|
prev = "";
|
|
}
|
|
# Handle sentences that should stay together
|
|
else if (prev ~ /[.?!]$/ && $0 ~ /^[[:upper:]]/) {
|
|
prev = prev " " $0;
|
|
}
|
|
# Join lines that are part of the same sentence (soft wrap)
|
|
else if (prev !~ /[.?!]$/) {
|
|
prev = prev " " $0;
|
|
}
|
|
# Otherwise, treat as a new sentence/paragraph
|
|
else {
|
|
if (prev) print prev;
|
|
prev = $0;
|
|
}
|
|
}
|
|
END { if (prev) print prev }' | sed -E 's/\n\n[[:space:]]+/\n\n/g' | sed -E 's/^[[:space:]]+//g')
|
|
|
|
echo "Extracted content: '$content'"
|
|
|
|
url_message=$(grep -A 1 'url_message (max 60 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted url_message: '$url_message'"
|
|
|
|
url=$(grep -A 1 'url (max 90 characters):' "$text_file" | tail -n 1 | awk '{$1=$1;print}')
|
|
echo "Extracted url: '$url'"
|
|
|
|
# Construct the URLs with the subdomain
|
|
onion_base="6dshf2gnj7yzxlfcaczlyi57up4mvbtd5orinuj5bjsfycnhz2w456yd.onion"
|
|
constructed_url="http://$subdomain.$onion_base"
|
|
|
|
# Use an existing tear_off_link from the config if provided; otherwise default
|
|
# to the constructed onion URL.
|
|
existing_tear_off_link=$(jq -r '.tear_off_link // empty' "$json_file")
|
|
tear_off_link="${existing_tear_off_link:-http://$subdomain.$onion_base}"
|
|
|
|
echo "Constructed URL: '$constructed_url'"
|
|
echo "Using Tear-off Link: '$tear_off_link'"
|
|
|
|
# Check if the extracted fields are not empty
|
|
if [ -z "$name" ] || [ -z "$subdomain" ] || [ -z "$title" ] || [ -z "$subtitle" ] || [ -z "$headline" ] || [ -z "$content" ] || [ -z "$url_message" ] || [ -z "$url" ]; then
|
|
echo "Error: One or more extracted fields are empty. Please check the PDF form and try again."
|
|
exit 1
|
|
fi
|
|
|
|
# Update the JSON using jq and log the operation
|
|
echo "Updating config.json..."
|
|
jq --arg name "$name" \
|
|
--arg subdomain "$subdomain" \
|
|
--arg title "$title" \
|
|
--arg subtitle "$subtitle" \
|
|
--arg headline "$headline" \
|
|
--arg content "$content" \
|
|
--arg url_message "$url_message" \
|
|
--arg url "$url" \
|
|
--arg tear_off_link "$tear_off_link" \
|
|
'.name = $name |
|
|
.subdomain = $subdomain |
|
|
.title = $title |
|
|
.subtitle = $subtitle |
|
|
.headline = $headline |
|
|
.content = $content |
|
|
.url_message = $url_message |
|
|
.url = $url |
|
|
.tear_off_link = $tear_off_link' "$json_file" > tmp.json && mv tmp.json "$json_file"
|
|
|
|
if [ $? -eq 0 ]; then
|
|
echo "Config file updated successfully."
|
|
else
|
|
echo "Error: Failed to update config file."
|
|
exit 1
|
|
fi
|