This commit is contained in:
thePR0M3TH3AN
2025-05-16 15:45:48 -04:00
parent 9bbb797634
commit 56012c0594

View File

@@ -1,331 +1,264 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Script Name: generate_repo_context.py Script Name: generate_repo_context.py
Description: Generates a context file (`repo-context.txt`) for AI coding assistants. Description: Generates a context file (``repo-context.txt``) for AI coding assistants.
Includes an overview, important information, a directory tree with exclusions, This revised version **only shows the directory structure for files that
content of important files with syntax highlighting, and a to-do list. are explicitly whitelisted** via the Streamlit UI (``important_files`` in
``config.yaml``). All other paths are ignored, so the directory tree is
minimal and focused on the user-selected scope.
Usage: Usage:
1. Ensure you have Python 3.7 or higher installed. Streamlit writes an updated ``config.yaml`` that includes:
source_directory: <absolute path to repo>
important_files: # list[str] paths *relative* to the repo root
exclude_dirs: # (optional) patterns to skip entirely
Then it invokes this script.
2. (Optional) Set up a Python virtual environment: See README or Streamlit UI for full workflow.
python -m venv venv
source venv/bin/activate # On Unix or MacOS
venv\Scripts\activate.bat # On Windows (Command Prompt)
venv\Scripts\Activate.ps1 # On Windows (PowerShell)
3. Install the required Python packages:
pip install -r requirements.txt
4. Configure `config.yaml` as needed.
5. Place `overview.txt`, `important_info.txt`, and `to-do_list.txt` in the `static_files` directory.
6. Run the script:
./generate_repo_context.py # Unix-like systems
python generate_repo_context.py # Windows
The script will create `repo-context.txt` with the specified structure.
""" """
from __future__ import annotations
import logging
import os import os
import sys import sys
import yaml from collections import defaultdict
from pathlib import Path
import logging
from typing import List, Dict
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Dict, List, Set
# Configuration Constants import yaml
# ─── Configuration Constants ────────────────────────────────────────────────────
CONFIG_FILE = "config.yaml" CONFIG_FILE = "config.yaml"
OUTPUT_FILE = "repo-context.txt" OUTPUT_FILE = "repo-context.txt"
# Static Text Files and Their Corresponding Section Titles # Static text sections that can be dropped in verbatim
STATIC_FILES = [ STATIC_FILES = [
{"file": "overview.txt", "section_title": "Overview"}, {"file": "overview.txt", "section_title": "Overview"},
{"file": "important_info.txt", "section_title": "Important Information"}, {"file": "important_info.txt", "section_title": "Important Information"},
{"file": "to-do_list.txt", "section_title": "To-Do List"} {"file": "to-do_list.txt", "section_title": "To-Do List"},
] ]
# Mapping of File Extensions to Programming Languages for Syntax Highlighting # File-extension → syntax-highlight language map for fenced code blocks
LANGUAGE_MAP = { LANGUAGE_MAP = {
'.py': 'python', ".py": "python",
'.json': 'json', ".json": "json",
'.env': 'bash', ".env": "bash",
'.js': 'javascript', ".js": "javascript",
'.html': 'html', ".html": "html",
'.css': 'css', ".css": "css",
'.csv': 'csv', ".csv": "csv",
'.md': 'markdown', ".md": "markdown",
'.txt': '', # Plain text ".txt": "", # plain text
'.xml': 'xml', ".xml": "xml",
# Add more mappings as needed
} }
# Extensions of Binary Files to Skip # Binary extensions (skipped we dont dump binary blobs into markdown)
BINARY_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.db', '.exe', '.bin'] BINARY_EXTENSIONS = [
".png",
".jpg",
".jpeg",
".gif",
".svg",
".ico",
".db",
".exe",
".bin",
]
# ─── Helpers ────────────────────────────────────────────────────────────────────
def setup_logging() -> None:
"""Configure basic colourless log output."""
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
def setup_logging():
"""Configures the logging format and level."""
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s] %(message)s'
)
def load_config(config_path: Path) -> Dict: def load_config(config_path: Path) -> Dict:
""" """Load YAML config or abort if its missing/invalid."""
Loads configuration from a YAML file.
Args:
config_path (Path): Path to the YAML configuration file.
Returns:
dict: Configuration dictionary containing 'exclude_dirs', 'important_files', and 'custom_sections'.
"""
if not config_path.exists(): if not config_path.exists():
logging.error(f"Configuration file {config_path} not found.") logging.error(f"Configuration file {config_path} not found.")
sys.exit(1) sys.exit(1)
try: try:
with open(config_path, 'r') as f: with open(config_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f) cfg = yaml.safe_load(f) or {}
logging.info(f"Loaded configuration from {config_path}.") logging.info("Loaded configuration.")
return config return cfg
except yaml.YAMLError as e: except yaml.YAMLError as exc:
logging.error(f"Error parsing configuration file: {e}") logging.error(f"Error parsing configuration file: {exc}")
sys.exit(1) sys.exit(1)
def generate_directory_tree(start_path: Path, exclude_dirs: List[str]) -> List[str]: # ─── Directory-tree generation (whitelist-aware) ───────────────────────────────
"""
Generates a directory tree as a list of strings, excluding specified directories. def build_whitelist_tree(
repo_root: Path, *, included_files: List[str], exclude_dirs: List[str] | None = None
) -> List[str]:
"""Return a list of tree-view lines that *only* contain
directories + files present in ``included_files``.
Args: Args:
start_path (Path): The root directory to start generating the tree from. repo_root: absolute path to repo root (``source_directory``)
exclude_dirs (list): List of directory patterns to exclude. included_files: list of *relative* paths (as written by Streamlit)
exclude_dirs: optional patterns to ignore entirely (even if theyd be
Returns: parents of an included file)
list: List of strings representing the directory tree.
""" """
tree_lines = []
root = start_path.resolve()
for dirpath, dirnames, filenames in os.walk(start_path):
current_path = Path(dirpath)
rel_path = current_path.relative_to(root)
# Skip excluded directories exclude_dirs = exclude_dirs or []
if any(current_path.match(excl) or excl in rel_path.parts for excl in exclude_dirs): repo_root = repo_root.resolve()
dirnames[:] = [] # Don't traverse further into subdirectories
# Normalise the whitelist: Posix style, unique, ensure they exist
norm_files: Set[Path] = set()
for rel in included_files:
p = (repo_root / rel).resolve()
if not p.exists():
logging.warning(f"Whitelisted file missing on disk skipped: {rel}")
continue continue
norm_files.add(p)
# Determine the indentation level if not norm_files:
depth = len(rel_path.parts) logging.warning("No valid whitelisted files directory tree will be empty.")
indent = " " * depth return []
connector = "├── " if depth > 0 else "."
if depth > 0:
tree_lines.append(f"{indent}{connector}{current_path.name}/")
else:
tree_lines.append(f"{connector}")
# Add files in the current directory # Collect every ancestor directory for each whitelisted file
for filename in sorted(filenames): keep_dirs: Set[Path] = set([repo_root])
file_rel_path = rel_path / filename for file_path in norm_files:
if any(file_rel_path.match(excl) or excl in file_rel_path.parts for excl in exclude_dirs): for parent in [*file_path.parents]: # includes repo_root eventually
continue if any(parent.match(excl) or excl in parent.name for excl in exclude_dirs):
file_indent = " " * (depth + 1) break # stop climbing when encountering an excluded dir
tree_lines.append(f"{file_indent}├── {filename}") keep_dirs.add(parent)
logging.info("Directory tree generated.") # Build children mapping for deterministic ordering
children: Dict[Path, List[Path]] = defaultdict(list)
for d in keep_dirs:
children[d.parent].append(d)
for fp in norm_files:
children[fp.parent].append(fp)
for kid_list in children.values():
kid_list.sort(key=lambda p: (not p.is_dir(), p.name.lower())) # dirs first
# Depth-first traversal to emit tree lines
tree_lines: List[str] = ["."]
def recurse(dir_path: Path, depth: int) -> None:
for entry in children.get(dir_path, []):
if entry == repo_root:
continue # skip root as it is already represented by '.'
indent = " " * depth
connector = "├── "
if entry.is_dir():
tree_lines.append(f"{indent}{connector}{entry.name}/")
recurse(entry, depth + 1)
else:
tree_lines.append(f"{indent}{connector}{entry.name}")
recurse(repo_root, 1)
logging.info("Whitelist-filtered directory tree generated.")
return tree_lines return tree_lines
def write_directory_tree(tree_lines: List[str], output_file: Path): # ─── Markdown writers ──────────────────────────────────────────────────────────
"""
Writes the directory tree to the output file within markdown code blocks.
Args: def write_directory_tree(tree_lines: List[str], out_path: Path) -> None:
tree_lines (list): List of strings representing the directory tree. with out_path.open("a", encoding="utf-8") as fh:
output_file (Path): Path to the output file where the tree will be written. fh.write("## Directory Tree (Whitelist Only)\n\n")
""" fh.write("```\n")
with output_file.open('a', encoding='utf-8') as f: fh.writelines(line + "\n" for line in tree_lines)
f.write("## Directory Tree with Exclusions\n\n") fh.write("```\n\n")
f.write("```\n")
for line in tree_lines:
f.write(line + "\n")
f.write("```\n\n")
logging.info("Directory tree written to the context file.")
def write_file_content(file_path: Path, output_file: Path):
"""
Writes the content of a file to the output file within markdown code blocks with syntax highlighting.
Args: def write_file_content(file_path: Path, out_path: Path) -> None:
file_path (Path): Path to the file whose content is to be written.
output_file (Path): Path to the output file where the content will be written.
"""
ext = file_path.suffix ext = file_path.suffix
language = LANGUAGE_MAP.get(ext, '') lang = LANGUAGE_MAP.get(ext, "")
try: try:
relative_display_path = file_path.relative_to(file_path.parents[1]) relative_display = file_path.relative_to(file_path.parents[1])
except ValueError: except ValueError:
# If relative_to fails, fallback to absolute path relative_display = file_path
relative_display_path = file_path
with output_file.open('a', encoding='utf-8') as f: with out_path.open("a", encoding="utf-8") as fh:
f.write(f"## {relative_display_path}\n") fh.write(f"## {relative_display}\n")
if language: fh.write(f"```{lang}\n" if lang else "```\n")
f.write(f"```{language}\n") if ext in BINARY_EXTENSIONS:
fh.write(f"*Binary file ({ext}) cannot be displayed.*\n")
else: else:
f.write("```\n") try:
try: fh.write(file_path.read_text(encoding="utf-8", errors="ignore"))
if ext in BINARY_EXTENSIONS: except Exception as exc:
# Skip binary files fh.write(f"*Error reading file: {exc}*\n")
f.write(f"*Binary file ({ext}) cannot be displayed.*\n") fh.write("\n```\n\n")
else:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file_content:
content = file_content.read()
f.write(content)
except Exception as e:
f.write(f"*Error reading file: {e}*\n")
f.write("\n```\n\n")
logging.info(f"Included content from {file_path}.")
def write_static_file(file_path: Path, output_file: Path, section_title: str):
"""
Writes the content of a static text file to the output file with a section header.
Args: def write_static_file(src: Path, out_path: Path, section_title: str) -> None:
file_path (Path): Path to the static text file. if not src.exists():
output_file (Path): Path to the output file where the content will be written. logging.warning(f"Static file missing skipped: {src}")
section_title (str): Title of the section to be added before the content.
"""
if not file_path.exists():
logging.warning(f"Static file {file_path} not found, skipping...")
return return
with output_file.open('a', encoding='utf-8') as f: out_path.parent.mkdir(parents=True, exist_ok=True)
f.write(f"## {section_title}\n\n") with out_path.open("a", encoding="utf-8") as fh:
try: fh.write(f"## {section_title}\n\n")
with open(file_path, 'r', encoding='utf-8', errors='ignore') as sf: fh.write(src.read_text(encoding="utf-8", errors="ignore") + "\n\n")
content = sf.read()
f.write(content + "\n\n")
except Exception as e:
f.write(f"*Error reading {file_path.name}: {e}*\n\n")
logging.error(f"Error reading {file_path}: {e}")
logging.info(f"Included static section: {section_title}.")
def write_custom_sections(custom_sections: List[Dict], script_dir: Path, output_file: Path):
"""
Writes custom sections to the output file based on configuration.
Args: def write_custom_sections(sections: List[Dict], script_dir: Path, out_path: Path) -> None:
custom_sections (list): List of dictionaries with 'file' and 'section_title'. for entry in sections:
script_dir (Path): Directory where the script is located. file_name = entry.get("file")
output_file (Path): Path to the output file. title = entry.get("section_title", "Custom Section")
""" write_static_file(script_dir / "static_files" / file_name, out_path, title)
for section in custom_sections:
file_name = section.get('file')
section_title = section.get('section_title', 'Custom Section')
file_path = script_dir / "static_files" / file_name
write_static_file(file_path, output_file, section_title)
# The XML section has been removed. # ─── Main ──────────────────────────────────────────────────────────────────────
# def append_xml_section(output_file: Path):
# """
# Appends the XML section to the output file within markdown code blocks.
#
# Args:
# output_file (Path): Path to the output file where the XML section will be appended.
# """
# xml_content = """
# ## XML Section
#
# ```xml
# <code_changes>
# <changed_files>
# <file>
# <file_operation>CREATE</file_operation>
# <file_path>app/new_file.py</file_path>
# <file_code><![CDATA[
# # New Python file
# def new_function():
# pass
# ]]></file_code>
# </file>
# <!-- Add more file changes here -->
# </changed_files>
# </code_changes>
# ```
#
# **Other rules:**
# - DO NOT remove `<ai_context>` sections. These are to provide you additional context about each file.
# - If you create a file, add an `<ai_context>` comment section at the top of the file.
# - If you update a file make sure its `<ai_context>` stays up-to-date.
# - DO NOT add comments related to your edits.
# - DO NOT remove my existing comments.
# """
# with output_file.open('a', encoding='utf-8') as f:
# f.write(xml_content + "\n")
# logging.info("XML section appended to the context file.")
def main(): def main() -> None:
"""Main function that orchestrates the generation of the repository context file."""
setup_logging() setup_logging()
# Determine the script's directory
script_dir = Path(__file__).parent.resolve() script_dir = Path(__file__).parent.resolve()
cfg = load_config(script_dir / CONFIG_FILE)
# Load configuration # Resolve repo root (can be absolute or relative)
config_path = script_dir / CONFIG_FILE source_dir_cfg = cfg.get("source_directory", "src")
config = load_config(config_path) repo_root = Path(source_dir_cfg).expanduser()
exclude_dirs = config.get("exclude_dirs", []) if not repo_root.is_absolute():
important_files = config.get("important_files", []) repo_root = (script_dir.parent / repo_root).resolve()
custom_sections = config.get("custom_sections", []) if not repo_root.exists():
logging.error(f"Source directory does not exist: {repo_root}")
# Define the starting path (default to 'src' directory or as specified)
source_dir = config.get("source_directory", "src")
start_path = script_dir.parent / source_dir
if not start_path.exists():
logging.error(f"Source directory {start_path} does not exist.")
sys.exit(1) sys.exit(1)
output_file = script_dir / OUTPUT_FILE important_files: List[str] = cfg.get("important_files", [])
output_file.unlink(missing_ok=True) # Remove if exists exclude_dirs: List[str] = cfg.get("exclude_dirs", [])
custom_sections: List[Dict] = cfg.get("custom_sections", [])
# Write a header to the output file out_path = script_dir / OUTPUT_FILE
with output_file.open('w', encoding='utf-8') as f: out_path.unlink(missing_ok=True)
f.write(f"# Repository Context\n\n")
f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d')}\n\n")
# Write static sections # ── Header ───────────────────────────────────────────────────────────────
with out_path.open("w", encoding="utf-8") as fh:
fh.write("# Repository Context\n\n")
fh.write(f"Generated on: {datetime.now():%Y-%m-%d}\n\n")
# ── Static boilerplate docs ──────────────────────────────────────────────
for static in STATIC_FILES: for static in STATIC_FILES:
static_path = script_dir / "static_files" / static["file"] write_static_file(script_dir / "static_files" / static["file"], out_path, static["section_title"])
write_static_file(static_path, output_file, static["section_title"])
# Generate and write the directory tree # ── Directory tree (whitelist only) ──────────────────────────────────────
tree_lines = generate_directory_tree(start_path, exclude_dirs) tree_lines = build_whitelist_tree(repo_root, included_files=important_files, exclude_dirs=exclude_dirs)
write_directory_tree(tree_lines, output_file) write_directory_tree(tree_lines, out_path)
# Write important files # ── Important file dumps ─────────────────────────────────────────────────
with output_file.open('a', encoding='utf-8') as f: with out_path.open("a", encoding="utf-8") as fh:
f.write("## Important Files\n\n") fh.write("## Important Files\n\n")
for relative_file in important_files: for rel_path in important_files:
file_path = start_path / relative_file abs_path = repo_root / rel_path
if file_path.exists(): if abs_path.exists():
write_file_content(file_path, output_file) write_file_content(abs_path, out_path)
else: else:
with output_file.open('a', encoding='utf-8') as f: with out_path.open("a", encoding="utf-8") as fh:
f.write(f"*File `{relative_file}` not found, skipping...*\n\n") fh.write(f"*File `{rel_path}` not found skipped.*\n\n")
logging.warning(f"Important file {relative_file} not found, skipping...") logging.warning(f"Important file not found on disk: {rel_path}")
# Write custom sections if any # ── Custom sections ─────────────────────────────────────────────────────
if custom_sections: if custom_sections:
write_custom_sections(custom_sections, script_dir, output_file) write_custom_sections(custom_sections, script_dir, out_path)
# Write to-do list logging.info(f"Context file created → {out_path}")
todo_path = script_dir / "static_files" / "to-do_list.txt"
write_static_file(todo_path, output_file, "To-Do List")
# The XML section output has been removed.
# append_xml_section(output_file)
logging.info(f"Context file created: {output_file}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()