Source code for promptprep.formatters

"""Makes your code look nice in different output formats."""

from abc import ABC, abstractmethod
import os
from typing import Dict, Optional, List, Any
import re

# Try to import pygments, but make it optional
import importlib.util

# Check if pygments is available without importing it directly
PYGMENTS_AVAILABLE = importlib.util.find_spec("pygments") is not None

if PYGMENTS_AVAILABLE:
    from pygments import highlight
    from pygments.lexers import get_lexer_for_filename, TextLexer
    from pygments.formatters import HtmlFormatter as PygmentsHtmlFormatter
    from pygments.formatters import Terminal256Formatter
else:
    # Define placeholders to avoid errors
    highlight = None
    TextLexer = None
    PygmentsHtmlFormatter = None
    Terminal256Formatter = None


[docs] class BaseFormatter(ABC): """The foundation for all our formatters."""
[docs] def __init__(self): """Initialize the formatter.""" pass
[docs] @abstractmethod def format_directory_tree(self, tree: str) -> str: """Format the directory tree.""" pass
[docs] @abstractmethod def format_file_header(self, file_path: str) -> str: """Format a file header.""" pass
[docs] @abstractmethod def format_code_content(self, content: str, file_path: str) -> str: """Format code content with line numbers.""" pass
[docs] @abstractmethod def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section.""" pass
[docs] @abstractmethod def format_error(self, error_msg: str) -> str: """Format error messages.""" pass
[docs] @abstractmethod def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section.""" pass
[docs] def get_file_extension(self, file_path: str) -> str: """Get the extension of a file.""" _, ext = os.path.splitext(file_path) return ext.lower()
[docs] class PlainTextFormatter(BaseFormatter): """Keeps things simple with plain text output."""
[docs] def format_directory_tree(self, tree: str) -> str: """Format the directory tree in plain text.""" return f"Directory Tree:\n{tree}\n\n"
[docs] def format_file_header(self, file_path: str) -> str: """Format a file header in plain text.""" return ( f"\n\n# ======================\n" f"# File: {file_path}\n" f"# ======================\n\n" )
[docs] def format_code_content(self, content: str, file_path: str) -> str: """Format code content in plain text (without line numbers).""" # Line numbering is handled by the aggregator based on the flag return content
[docs] def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section in plain text.""" result = "# ======================\n" result += "# Codebase Metadata\n" result += "# ======================\n\n" for key, value in metadata.items(): if key == "comment_ratio" and isinstance(value, float): result += f"# {key.replace('_', ' ').title()}: {value:.2f}\n" else: result += f"# {key.replace('_', ' ').title()}: {value}\n" return result
[docs] def format_error(self, error_msg: str) -> str: """Format error messages in plain text.""" return f"\n# {error_msg}\n"
[docs] def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section in plain text.""" if not skipped_files: return "" result = "\n\n# ======================\n" result += "# Files skipped due to size limit\n" result += "# ======================\n\n" for file_path, size_mb in skipped_files: result += f"# {file_path} ({size_mb:.2f} MB)\n" return result
[docs] class MarkdownFormatter(BaseFormatter): """Makes your code look great in Markdown documents."""
[docs] def format_directory_tree(self, tree: str) -> str: """Format the directory tree in Markdown.""" # Wrap tree in a code block for proper formatting return f"## Directory Tree\n\n```\n{tree}\n```\n\n"
[docs] def format_file_header(self, file_path: str) -> str: """Format a file header in Markdown.""" return f"\n\n## File: {file_path}\n\n"
[docs] def format_code_content(self, content: str, file_path: str) -> str: """Format code content in Markdown (without line numbers).""" ext = self.get_file_extension(file_path) or "" # Remove the dot from the extension for markdown code blocks lang = ext[1:] if ext else "" # Line numbering is handled by the aggregator based on the flag return f"```{lang}\n{content}\n```"
[docs] def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section in Markdown.""" result = "## Codebase Metadata\n\n" result += "| Metric | Value |\n" result += "| ------ | ----- |\n" for key, value in metadata.items(): if key == "comment_ratio" and isinstance(value, float): result += f"| {key.replace('_', ' ').title()} | {value:.2f} |\n" else: result += f"| {key.replace('_', ' ').title()} | {value} |\n" return result
[docs] def format_error(self, error_msg: str) -> str: """Format error messages in Markdown.""" return f"\n> **Error:** {error_msg}\n"
[docs] def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section in Markdown.""" if not skipped_files: return "" result = "\n\n## Files skipped due to size limit\n\n" result += "| File | Size |\n" result += "| ---- | ---- |\n" for file_path, size_mb in skipped_files: result += f"| {file_path} | {size_mb:.2f} MB |\n" return result
[docs] class HtmlFormatter(BaseFormatter): """Creates a nice-looking webpage with your code."""
[docs] def __init__(self): """Initialize HTML formatter with CSS styles.""" super().__init__() self.css = """ <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; margin: 0; padding: 20px; color: #333; background-color: #f8f8f8; } h1, h2 { color: #2c3e50; margin-top: 30px; margin-bottom: 15px; } pre { background-color: #f1f1f1; padding: 10px; border-radius: 5px; overflow-x: auto; font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace; font-size: 14px; white-space: pre-wrap; } pre.tree { font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace; } .line-number { color: #999; margin-right: 10px; user-select: none; } .file-header { background-color: #3498db; color: white; padding: 10px; border-radius: 5px 5px 0 0; font-weight: bold; margin-top: 25px; } .file-content { margin-top: 0; border-radius: 0 0 5px 5px; } table { border-collapse: collapse; width: 100%; margin: 20px 0; } th, td { border: 1px solid #ddd; padding: 8px; text-align: left; } th { background-color: #f2f2f2; } .error-message { color: #e74c3c; padding: 10px; margin: 10px 0; background-color: #fadbd8; border-left: 4px solid #e74c3c; } </style> """
[docs] def format_directory_tree(self, tree: str) -> str: """Format the directory tree in HTML.""" escaped_tree = tree.replace("<", "&lt;").replace(">", "&gt;") return f"<h2>Directory Tree</h2>\n<pre class='tree'>{escaped_tree}</pre>\n\n"
[docs] def format_file_header(self, file_path: str) -> str: """Format a file header in HTML.""" escaped_path = file_path.replace("<", "&lt;").replace(">", "&gt;") return f"\n\n<div class='file-header'>File: {escaped_path}</div>\n"
[docs] def format_code_content(self, content: str, file_path: str) -> str: """Format code content in HTML (without line numbers).""" # Line numbering is handled by the aggregator based on the flag escaped_content = content.replace("<", "&lt;").replace(">", "&gt;") return f"<pre class='file-content'>{escaped_content}</pre>"
[docs] def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section in HTML.""" result = "<h2>Codebase Metadata</h2>\n\n<table>\n" result += "<tr><th>Metric</th><th>Value</th></tr>\n" for key, value in metadata.items(): if key == "comment_ratio" and isinstance(value, float): result += f"<tr><td>{key.replace('_', ' ').title()}</td><td>{value:.2f}</td></tr>\n" else: result += f"<tr><td>{key.replace('_', ' ').title()}</td><td>{value}</td></tr>\n" result += "</table>\n" return result
[docs] def format_error(self, error_msg: str) -> str: """Format error messages in HTML.""" escaped_msg = error_msg.replace("<", "&lt;").replace(">", "&gt;") return f"\n<div class='error-message'>Error: {escaped_msg}</div>\n"
[docs] def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section in HTML.""" if not skipped_files: return "" result = "\n\n<h2>Files skipped due to size limit</h2>\n\n<table>\n" result += "<tr><th>File</th><th>Size</th></tr>\n" for file_path, size_mb in skipped_files: escaped_path = file_path.replace("<", "&lt;").replace(">", "&gt;") result += f"<tr><td>{escaped_path}</td><td>{size_mb:.2f} MB</td></tr>\n" result += "</table>\n" return result
[docs] def get_full_html(self, content: str, title: str = "Code Aggregation") -> str: """Wrap content in a complete HTML document.""" return f"""<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>{title}</title> {self.css} </head> <body> <h1>{title}</h1> {content} </body> </html> """
[docs] class HighlightedFormatter(BaseFormatter): """Adds syntax highlighting to make your code pop."""
[docs] def __init__(self, html_output: bool = True): """Gets ready to highlight your code. Args: html_output: True for web pages, False for terminal colors """ super().__init__() self.html_output = html_output self.base_formatter = HtmlFormatter() if html_output else PlainTextFormatter() # Check if pygments is available if PYGMENTS_AVAILABLE: self.pygments_formatter = ( PygmentsHtmlFormatter(cssclass="source", wrapcode=True) if html_output else Terminal256Formatter() ) else: self.pygments_formatter = None
[docs] def format_directory_tree(self, tree: str) -> str: """Format the directory tree with highlighting.""" return self.base_formatter.format_directory_tree(tree)
[docs] def format_file_header(self, file_path: str) -> str: """Format a file header with highlighting.""" return self.base_formatter.format_file_header(file_path)
[docs] def format_code_content(self, content: str, file_path: str) -> str: """Format code content with syntax highlighting (without line numbers).""" # If pygments is not available or we're in terminal mode, fall back to base formatter if not PYGMENTS_AVAILABLE or not self.html_output: return self.base_formatter.format_code_content(content, file_path) try: lexer = get_lexer_for_filename(file_path, stripall=True) except Exception: lexer = TextLexer() # Line numbering is handled by the aggregator based on the flag highlighted = highlight(content, lexer, self.pygments_formatter) # Always add CSS for HTML output css = self.pygments_formatter.get_style_defs(".source") return f"<style>{css}</style>\n{highlighted}"
[docs] def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section.""" return self.base_formatter.format_metadata(metadata)
[docs] def format_error(self, error_msg: str) -> str: """Format error messages.""" return self.base_formatter.format_error(error_msg)
[docs] def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section.""" return self.base_formatter.format_skipped_files(skipped_files)
[docs] def get_full_html(self, content: str, title: str = "Code Aggregation") -> str: """Wrap content in a complete HTML document if in HTML mode.""" if self.html_output and hasattr(self.base_formatter, "get_full_html"): return self.base_formatter.get_full_html(content, title) return content
[docs] class CustomTemplateFormatter(BaseFormatter): """Lets you design your own output format using a template file. Your template can use these placeholders: - ${DIRECTORY_TREE} - Shows your folder structure - ${FILE_HEADER:path} - Adds a header for each file - ${FILE_CONTENT:path} - Puts in the actual code - ${METADATA} - Adds stats about your codebase - ${SKIPPED_FILES} - Lists any files that were too big - ${FILES} - All your files with headers and content - ${TITLE} - The main title """
[docs] def __init__(self, template_file: str, base_format: str = "plain"): """Initialize custom template formatter. Args: template_file: Path to the template file base_format: The base format to use for sections not defined in the template (plain, markdown, html, highlighted) """ super().__init__() self.template_file = template_file self.template = self._load_template(template_file) self.base_format = base_format # Use a base formatter for basic formatting if base_format == "plain": self.base_formatter = PlainTextFormatter() elif base_format == "markdown": self.base_formatter = MarkdownFormatter() elif base_format == "html": self.base_formatter = HtmlFormatter() elif base_format == "highlighted": self.base_formatter = HighlightedFormatter() else: self.base_formatter = PlainTextFormatter()
def _load_template(self, template_file: str) -> str: """Load the template file content.""" try: with open(template_file, "r") as f: return f.read() except IOError as e: raise IOError(f"Could not read template file: {e}")
[docs] def format_directory_tree(self, tree: str) -> str: """Format the directory tree using the base formatter.""" return self.base_formatter.format_directory_tree(tree)
[docs] def format_file_header(self, file_path: str) -> str: """Format a file header using the base formatter.""" return self.base_formatter.format_file_header(file_path)
[docs] def format_code_content(self, content: str, file_path: str) -> str: """Format code content using the base formatter.""" return self.base_formatter.format_code_content(content, file_path)
[docs] def format_metadata(self, metadata: Dict[str, Any]) -> str: """Format metadata section using the base formatter.""" return self.base_formatter.format_metadata(metadata)
[docs] def format_error(self, error_msg: str) -> str: """Format error messages using the base formatter.""" return self.base_formatter.format_error(error_msg)
[docs] def format_skipped_files(self, skipped_files: List[tuple]) -> str: """Format skipped files section using the base formatter.""" return self.base_formatter.format_skipped_files(skipped_files)
[docs] def render_template( self, directory_tree: str, files_content: Dict[str, str], metadata: Dict[str, Any], skipped_files: List[tuple], title: str = "Code Aggregation", ) -> str: """Render the template with the provided content. Args: directory_tree: ASCII representation of the directory tree files_content: Dictionary mapping file paths to their content metadata: Dictionary of metadata about the codebase skipped_files: List of (file_path, size) tuples for skipped files title: Title of the output (default: "Code Aggregation") Returns: The rendered template content """ # Start with the template result = self.template # Replace title placeholder result = result.replace("${TITLE}", title) # Replace directory tree placeholder if "${DIRECTORY_TREE}" in result: formatted_tree = self.format_directory_tree(directory_tree) result = result.replace("${DIRECTORY_TREE}", formatted_tree) # Replace metadata placeholder if "${METADATA}" in result: formatted_metadata = self.format_metadata(metadata) result = result.replace("${METADATA}", formatted_metadata) # Replace skipped files placeholder if "${SKIPPED_FILES}" in result: formatted_skipped = self.format_skipped_files(skipped_files) result = result.replace("${SKIPPED_FILES}", formatted_skipped) # Replace file header and content placeholders for placeholder_type in ["FILE_HEADER", "FILE_CONTENT"]: pattern = re.compile(r"\${" + placeholder_type + r":([^}]+)}") matches = pattern.findall(result) for file_path in matches: placeholder = f"${{{placeholder_type}:{file_path}}}" if file_path in files_content: if placeholder_type == "FILE_HEADER": formatted = self.format_file_header(file_path) else: # FILE_CONTENT formatted = self.format_code_content( files_content[file_path], file_path ) result = result.replace(placeholder, formatted) else: error_msg = f"File not found: {file_path}" result = result.replace(placeholder, self.format_error(error_msg)) # Handle generic file placeholders if "${FILES}" in result: file_content = "" for file_path, content in files_content.items(): file_content += self.format_file_header(file_path) file_content += self.format_code_content(content, file_path) result = result.replace("${FILES}", file_content) return result
# Add CustomTemplateFormatter to the get_formatter logic check
[docs] def get_formatter( output_format: str = "plain", template_file: Optional[str] = None, base_format: str = "plain", ) -> BaseFormatter: """Picks the right formatter for your needs. Args: output_format: How you want it to look (plain, markdown, html, highlighted, custom) template_file: Your template file (needed for custom format) base_format: Backup format for custom templates (defaults to plain) Returns: The formatter that'll do the job Raises: ValueError: If format is unknown or template is missing when needed """ if output_format == "plain": return PlainTextFormatter() elif output_format == "markdown": return MarkdownFormatter() elif output_format == "html": return HtmlFormatter() elif output_format == "highlighted": # Decide HTML vs terminal based on some logic or default? # For CLI, terminal (False) might be better unless output is piped/redirected. # Let's default to terminal highlighting for now if not HTML file. # This part might need refinement based on desired behavior. # For simplicity, let's assume highlighted implies terminal for non-HTML output. # A better approach might be a separate flag or inferring from output file extension. # Sticking to the original logic for now: return HighlightedFormatter() # Defaults to HTML=True based on original code elif output_format == "custom": if not template_file: raise ValueError("template_file is required for custom output format") try: # Pass base_format if needed by CustomTemplateFormatter constructor return CustomTemplateFormatter(template_file, base_format=base_format) except IOError as e: raise ValueError( f"Error loading template file: {e}" ) # Raise ValueError for CLI handling else: raise ValueError(f"Unknown output format: {output_format}")