update

2025-09-07 06:28:43 +00:00 · 2025-05-16 15:45:48 -04:00
parent 9bbb797634
commit 56012c0594
1 changed files with 189 additions and 256 deletions
--- a/src/generate_repo_context.py
+++ b/src/generate_repo_context.py
@@ -1,331 +1,264 @@
 #!/usr/bin/env python3
-
 """
 Script Name: generate_repo_context.py
-Description: Generates a context file (`repo-context.txt`) for AI coding assistants.
-             Includes an overview, important information, a directory tree with exclusions,
-             content of important files with syntax highlighting, and a to-do list.
+Description: Generates a context file (``repo-context.txt``) for AI coding assistants.
+             This revised version **only shows the directory structure for files that
+             are explicitly whitelisted** via the Streamlit UI (``important_files`` in
+             ``config.yaml``). All other paths are ignored, so the directory tree is
+             minimal and focused on the user-selected scope.

 Usage:
-1. Ensure you have Python 3.7 or higher installed.
+    Streamlit writes an updated ``config.yaml`` that includes:
+        source_directory: <absolute path to repo>
+        important_files:  # list[str] – paths *relative* to the repo root
+        exclude_dirs:     # (optional) patterns to skip entirely
+    Then it invokes this script.

-2. (Optional) Set up a Python virtual environment:
-     python -m venv venv
-     source venv/bin/activate               # On Unix or MacOS
-     venv\Scripts\activate.bat              # On Windows (Command Prompt)
-     venv\Scripts\Activate.ps1              # On Windows (PowerShell)
-
-3. Install the required Python packages:
-     pip install -r requirements.txt
-
-4. Configure `config.yaml` as needed.
-
-5. Place `overview.txt`, `important_info.txt`, and `to-do_list.txt` in the `static_files` directory.
-
-6. Run the script:
-     ./generate_repo_context.py             # Unix-like systems
-     python generate_repo_context.py        # Windows
-     
-The script will create `repo-context.txt` with the specified structure.
+    See README or Streamlit UI for full workflow.
 """

+from __future__ import annotations
+
+import logging
 import os
 import sys
-import yaml
-from pathlib import Path
-import logging
-from typing import List, Dict
+from collections import defaultdict
 from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Set

-# Configuration Constants
+import yaml
+
+# ─── Configuration Constants ────────────────────────────────────────────────────
 CONFIG_FILE = "config.yaml"
 OUTPUT_FILE = "repo-context.txt"

-# Static Text Files and Their Corresponding Section Titles
+# Static text sections that can be dropped in verbatim
 STATIC_FILES = [
    {"file": "overview.txt", "section_title": "Overview"},
    {"file": "important_info.txt", "section_title": "Important Information"},
-    {"file": "to-do_list.txt", "section_title": "To-Do List"}
+    {"file": "to-do_list.txt", "section_title": "To-Do List"},
 ]

-# Mapping of File Extensions to Programming Languages for Syntax Highlighting
+# File-extension → syntax-highlight language map for fenced code blocks
 LANGUAGE_MAP = {
-    '.py': 'python',
-    '.json': 'json',
-    '.env': 'bash',
-    '.js': 'javascript',
-    '.html': 'html',
-    '.css': 'css',
-    '.csv': 'csv',
-    '.md': 'markdown',
-    '.txt': '',  # Plain text
-    '.xml': 'xml',
-    # Add more mappings as needed
+    ".py": "python",
+    ".json": "json",
+    ".env": "bash",
+    ".js": "javascript",
+    ".html": "html",
+    ".css": "css",
+    ".csv": "csv",
+    ".md": "markdown",
+    ".txt": "",  # plain text
+    ".xml": "xml",
 }

-# Extensions of Binary Files to Skip
-BINARY_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.db', '.exe', '.bin']
+# Binary extensions (skipped – we don’t dump binary blobs into markdown)
+BINARY_EXTENSIONS = [
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".svg",
+    ".ico",
+    ".db",
+    ".exe",
+    ".bin",
+]
+
+# ─── Helpers ────────────────────────────────────────────────────────────────────
+
+def setup_logging() -> None:
+    """Configure basic colourless log output."""
+    logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")

-def setup_logging():
-    """Configures the logging format and level."""
-    logging.basicConfig(
-        level=logging.INFO,
-        format='[%(levelname)s] %(message)s'
-    )

 def load_config(config_path: Path) -> Dict:
-    """
-    Loads configuration from a YAML file.
-
-    Args:
-        config_path (Path): Path to the YAML configuration file.
-
-    Returns:
-        dict: Configuration dictionary containing 'exclude_dirs', 'important_files', and 'custom_sections'.
-    """
+    """Load YAML config or abort if it’s missing/invalid."""
    if not config_path.exists():
        logging.error(f"Configuration file {config_path} not found.")
        sys.exit(1)
    try:
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        logging.info(f"Loaded configuration from {config_path}.")
-        return config
-    except yaml.YAMLError as e:
-        logging.error(f"Error parsing configuration file: {e}")
+        with open(config_path, "r", encoding="utf-8") as f:
+            cfg = yaml.safe_load(f) or {}
+        logging.info("Loaded configuration.")
+        return cfg
+    except yaml.YAMLError as exc:
+        logging.error(f"Error parsing configuration file: {exc}")
        sys.exit(1)

-def generate_directory_tree(start_path: Path, exclude_dirs: List[str]) -> List[str]:
-    """
-    Generates a directory tree as a list of strings, excluding specified directories.
+# ─── Directory-tree generation (whitelist-aware) ───────────────────────────────
+
+def build_whitelist_tree(
+    repo_root: Path, *, included_files: List[str], exclude_dirs: List[str] | None = None
+) -> List[str]:
+    """Return a list of tree-view lines that *only* contain
+    directories + files present in ``included_files``.

    Args:
-        start_path (Path): The root directory to start generating the tree from.
-        exclude_dirs (list): List of directory patterns to exclude.
-
-    Returns:
-        list: List of strings representing the directory tree.
+        repo_root: absolute path to repo root (``source_directory``)
+        included_files: list of *relative* paths (as written by Streamlit)
+        exclude_dirs: optional patterns to ignore entirely (even if they’d be
+                       parents of an included file)
    """
-    tree_lines = []
-    root = start_path.resolve()
-    for dirpath, dirnames, filenames in os.walk(start_path):
-        current_path = Path(dirpath)
-        rel_path = current_path.relative_to(root)

-        # Skip excluded directories
-        if any(current_path.match(excl) or excl in rel_path.parts for excl in exclude_dirs):
-            dirnames[:] = []  # Don't traverse further into subdirectories
+    exclude_dirs = exclude_dirs or []
+    repo_root = repo_root.resolve()
+
+    # Normalise the whitelist: Posix style, unique, ensure they exist
+    norm_files: Set[Path] = set()
+    for rel in included_files:
+        p = (repo_root / rel).resolve()
+        if not p.exists():
+            logging.warning(f"Whitelisted file missing on disk – skipped: {rel}")
            continue
+        norm_files.add(p)

-        # Determine the indentation level
-        depth = len(rel_path.parts)
-        indent = "    " * depth
-        connector = "├── " if depth > 0 else "."
-        if depth > 0:
-            tree_lines.append(f"{indent}{connector}{current_path.name}/")
-        else:
-            tree_lines.append(f"{connector}")
+    if not norm_files:
+        logging.warning("No valid whitelisted files – directory tree will be empty.")
+        return []

-        # Add files in the current directory
-        for filename in sorted(filenames):
-            file_rel_path = rel_path / filename
-            if any(file_rel_path.match(excl) or excl in file_rel_path.parts for excl in exclude_dirs):
-                continue
-            file_indent = "    " * (depth + 1)
-            tree_lines.append(f"{file_indent}├── {filename}")
+    # Collect every ancestor directory for each whitelisted file
+    keep_dirs: Set[Path] = set([repo_root])
+    for file_path in norm_files:
+        for parent in [*file_path.parents]:  # includes repo_root eventually
+            if any(parent.match(excl) or excl in parent.name for excl in exclude_dirs):
+                break  # stop climbing when encountering an excluded dir
+            keep_dirs.add(parent)

-    logging.info("Directory tree generated.")
+    # Build children mapping for deterministic ordering
+    children: Dict[Path, List[Path]] = defaultdict(list)
+    for d in keep_dirs:
+        children[d.parent].append(d)
+    for fp in norm_files:
+        children[fp.parent].append(fp)
+
+    for kid_list in children.values():
+        kid_list.sort(key=lambda p: (not p.is_dir(), p.name.lower()))  # dirs first
+
+    # Depth-first traversal to emit tree lines
+    tree_lines: List[str] = ["."]
+
+    def recurse(dir_path: Path, depth: int) -> None:
+        for entry in children.get(dir_path, []):
+            if entry == repo_root:
+                continue  # skip root as it is already represented by '.'
+            indent = "    " * depth
+            connector = "├── "
+            if entry.is_dir():
+                tree_lines.append(f"{indent}{connector}{entry.name}/")
+                recurse(entry, depth + 1)
+            else:
+                tree_lines.append(f"{indent}{connector}{entry.name}")
+
+    recurse(repo_root, 1)
+    logging.info("Whitelist-filtered directory tree generated.")
    return tree_lines

-def write_directory_tree(tree_lines: List[str], output_file: Path):
-    """
-    Writes the directory tree to the output file within markdown code blocks.
+# ─── Markdown writers ──────────────────────────────────────────────────────────

-    Args:
-        tree_lines (list): List of strings representing the directory tree.
-        output_file (Path): Path to the output file where the tree will be written.
-    """
-    with output_file.open('a', encoding='utf-8') as f:
-        f.write("## Directory Tree with Exclusions\n\n")
-        f.write("```\n")
-        for line in tree_lines:
-            f.write(line + "\n")
-        f.write("```\n\n")
-    logging.info("Directory tree written to the context file.")
+def write_directory_tree(tree_lines: List[str], out_path: Path) -> None:
+    with out_path.open("a", encoding="utf-8") as fh:
+        fh.write("## Directory Tree (Whitelist Only)\n\n")
+        fh.write("```\n")
+        fh.writelines(line + "\n" for line in tree_lines)
+        fh.write("```\n\n")

-def write_file_content(file_path: Path, output_file: Path):
-    """
-    Writes the content of a file to the output file within markdown code blocks with syntax highlighting.

-    Args:
-        file_path (Path): Path to the file whose content is to be written.
-        output_file (Path): Path to the output file where the content will be written.
-    """
+def write_file_content(file_path: Path, out_path: Path) -> None:
    ext = file_path.suffix
-    language = LANGUAGE_MAP.get(ext, '')
+    lang = LANGUAGE_MAP.get(ext, "")
+
    try:
-        relative_display_path = file_path.relative_to(file_path.parents[1])
+        relative_display = file_path.relative_to(file_path.parents[1])
    except ValueError:
-        # If relative_to fails, fallback to absolute path
-        relative_display_path = file_path
-    with output_file.open('a', encoding='utf-8') as f:
-        f.write(f"## {relative_display_path}\n")
-        if language:
-            f.write(f"```{language}\n")
+        relative_display = file_path
+
+    with out_path.open("a", encoding="utf-8") as fh:
+        fh.write(f"## {relative_display}\n")
+        fh.write(f"```{lang}\n" if lang else "```\n")
+        if ext in BINARY_EXTENSIONS:
+            fh.write(f"*Binary file ({ext}) cannot be displayed.*\n")
        else:
-            f.write("```\n")
-        try:
-            if ext in BINARY_EXTENSIONS:
-                # Skip binary files
-                f.write(f"*Binary file ({ext}) cannot be displayed.*\n")
-            else:
-                with open(file_path, 'r', encoding='utf-8', errors='ignore') as file_content:
-                    content = file_content.read()
-                    f.write(content)
-        except Exception as e:
-            f.write(f"*Error reading file: {e}*\n")
-        f.write("\n```\n\n")
-    logging.info(f"Included content from {file_path}.")
+            try:
+                fh.write(file_path.read_text(encoding="utf-8", errors="ignore"))
+            except Exception as exc:
+                fh.write(f"*Error reading file: {exc}*\n")
+        fh.write("\n```\n\n")

-def write_static_file(file_path: Path, output_file: Path, section_title: str):
-    """
-    Writes the content of a static text file to the output file with a section header.

-    Args:
-        file_path (Path): Path to the static text file.
-        output_file (Path): Path to the output file where the content will be written.
-        section_title (str): Title of the section to be added before the content.
-    """
-    if not file_path.exists():
-        logging.warning(f"Static file {file_path} not found, skipping...")
+def write_static_file(src: Path, out_path: Path, section_title: str) -> None:
+    if not src.exists():
+        logging.warning(f"Static file missing – skipped: {src}")
        return
-    with output_file.open('a', encoding='utf-8') as f:
-        f.write(f"## {section_title}\n\n")
-        try:
-            with open(file_path, 'r', encoding='utf-8', errors='ignore') as sf:
-                content = sf.read()
-                f.write(content + "\n\n")
-        except Exception as e:
-            f.write(f"*Error reading {file_path.name}: {e}*\n\n")
-            logging.error(f"Error reading {file_path}: {e}")
-    logging.info(f"Included static section: {section_title}.")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with out_path.open("a", encoding="utf-8") as fh:
+        fh.write(f"## {section_title}\n\n")
+        fh.write(src.read_text(encoding="utf-8", errors="ignore") + "\n\n")

-def write_custom_sections(custom_sections: List[Dict], script_dir: Path, output_file: Path):
-    """
-    Writes custom sections to the output file based on configuration.

-    Args:
-        custom_sections (list): List of dictionaries with 'file' and 'section_title'.
-        script_dir (Path): Directory where the script is located.
-        output_file (Path): Path to the output file.
-    """
-    for section in custom_sections:
-        file_name = section.get('file')
-        section_title = section.get('section_title', 'Custom Section')
-        file_path = script_dir / "static_files" / file_name
-        write_static_file(file_path, output_file, section_title)
+def write_custom_sections(sections: List[Dict], script_dir: Path, out_path: Path) -> None:
+    for entry in sections:
+        file_name = entry.get("file")
+        title = entry.get("section_title", "Custom Section")
+        write_static_file(script_dir / "static_files" / file_name, out_path, title)

-# The XML section has been removed.
-# def append_xml_section(output_file: Path):
-#     """
-#     Appends the XML section to the output file within markdown code blocks.
-#
-#     Args:
-#         output_file (Path): Path to the output file where the XML section will be appended.
-#     """
-#     xml_content = """
-# ## XML Section
-#
-# ```xml
-# <code_changes>
-#   <changed_files>
-#     <file>
-#       <file_operation>CREATE</file_operation>
-#       <file_path>app/new_file.py</file_path>
-#       <file_code><![CDATA[
-# # New Python file
-# def new_function():
-#     pass
-# ]]></file_code>
-#     </file>
-#     <!-- Add more file changes here -->
-#   </changed_files>
-# </code_changes>
-# ```
-#
-# **Other rules:**
-# - DO NOT remove `<ai_context>` sections. These are to provide you additional context about each file.
-# - If you create a file, add an `<ai_context>` comment section at the top of the file.
-# - If you update a file make sure its `<ai_context>` stays up-to-date.
-# - DO NOT add comments related to your edits.
-# - DO NOT remove my existing comments.
-# """
-#     with output_file.open('a', encoding='utf-8') as f:
-#         f.write(xml_content + "\n")
-#     logging.info("XML section appended to the context file.")
+# ─── Main ──────────────────────────────────────────────────────────────────────

-def main():
-    """Main function that orchestrates the generation of the repository context file."""
+def main() -> None:
    setup_logging()

-    # Determine the script's directory
    script_dir = Path(__file__).parent.resolve()
+    cfg = load_config(script_dir / CONFIG_FILE)

-    # Load configuration
-    config_path = script_dir / CONFIG_FILE
-    config = load_config(config_path)
-    exclude_dirs = config.get("exclude_dirs", [])
-    important_files = config.get("important_files", [])
-    custom_sections = config.get("custom_sections", [])
-
-    # Define the starting path (default to 'src' directory or as specified)
-    source_dir = config.get("source_directory", "src")
-    start_path = script_dir.parent / source_dir
-    if not start_path.exists():
-        logging.error(f"Source directory {start_path} does not exist.")
+    # Resolve repo root (can be absolute or relative)
+    source_dir_cfg = cfg.get("source_directory", "src")
+    repo_root = Path(source_dir_cfg).expanduser()
+    if not repo_root.is_absolute():
+        repo_root = (script_dir.parent / repo_root).resolve()
+    if not repo_root.exists():
+        logging.error(f"Source directory does not exist: {repo_root}")
        sys.exit(1)

-    output_file = script_dir / OUTPUT_FILE
-    output_file.unlink(missing_ok=True)  # Remove if exists
+    important_files: List[str] = cfg.get("important_files", [])
+    exclude_dirs: List[str] = cfg.get("exclude_dirs", [])
+    custom_sections: List[Dict] = cfg.get("custom_sections", [])

-    # Write a header to the output file
-    with output_file.open('w', encoding='utf-8') as f:
-        f.write(f"# Repository Context\n\n")
-        f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d')}\n\n")
+    out_path = script_dir / OUTPUT_FILE
+    out_path.unlink(missing_ok=True)

-    # Write static sections
+    # ── Header ───────────────────────────────────────────────────────────────
+    with out_path.open("w", encoding="utf-8") as fh:
+        fh.write("# Repository Context\n\n")
+        fh.write(f"Generated on: {datetime.now():%Y-%m-%d}\n\n")
+
+    # ── Static boilerplate docs ──────────────────────────────────────────────
    for static in STATIC_FILES:
-        static_path = script_dir / "static_files" / static["file"]
-        write_static_file(static_path, output_file, static["section_title"])
+        write_static_file(script_dir / "static_files" / static["file"], out_path, static["section_title"])

-    # Generate and write the directory tree
-    tree_lines = generate_directory_tree(start_path, exclude_dirs)
-    write_directory_tree(tree_lines, output_file)
+    # ── Directory tree (whitelist only) ──────────────────────────────────────
+    tree_lines = build_whitelist_tree(repo_root, included_files=important_files, exclude_dirs=exclude_dirs)
+    write_directory_tree(tree_lines, out_path)

-    # Write important files
-    with output_file.open('a', encoding='utf-8') as f:
-        f.write("## Important Files\n\n")
-    for relative_file in important_files:
-        file_path = start_path / relative_file
-        if file_path.exists():
-            write_file_content(file_path, output_file)
+    # ── Important file dumps ─────────────────────────────────────────────────
+    with out_path.open("a", encoding="utf-8") as fh:
+        fh.write("## Important Files\n\n")
+    for rel_path in important_files:
+        abs_path = repo_root / rel_path
+        if abs_path.exists():
+            write_file_content(abs_path, out_path)
        else:
-            with output_file.open('a', encoding='utf-8') as f:
-                f.write(f"*File `{relative_file}` not found, skipping...*\n\n")
-            logging.warning(f"Important file {relative_file} not found, skipping...")
+            with out_path.open("a", encoding="utf-8") as fh:
+                fh.write(f"*File `{rel_path}` not found – skipped.*\n\n")
+            logging.warning(f"Important file not found on disk: {rel_path}")

-    # Write custom sections if any
+    # ── Custom sections ─────────────────────────────────────────────────────
    if custom_sections:
-        write_custom_sections(custom_sections, script_dir, output_file)
+        write_custom_sections(custom_sections, script_dir, out_path)

-    # Write to-do list
-    todo_path = script_dir / "static_files" / "to-do_list.txt"
-    write_static_file(todo_path, output_file, "To-Do List")
+    logging.info(f"Context file created → {out_path}")

-    # The XML section output has been removed.
-    # append_xml_section(output_file)
-
-    logging.info(f"Context file created: {output_file}")

 if __name__ == "__main__":
    main()