From 9eaef714125b4fa65ddd9972573864a86d589900 Mon Sep 17 00:00:00 2001 From: evans <58369673+Johnnyevans32@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:24:06 +0100 Subject: [PATCH] File type analyzer (#73) --- pyproject.toml | 1 + .../toolkits/filetype_analyzer.py | 166 ++++++++++++++++++ tests/toolkits/test_filetype_analyzer.py | 63 +++++++ 3 files changed, 230 insertions(+) create mode 100644 src/goose_plugins/toolkits/filetype_analyzer.py create mode 100644 tests/toolkits/test_filetype_analyzer.py diff --git a/pyproject.toml b/pyproject.toml index a57858a..d20c907 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ artify = "goose_plugins.toolkits.artify:VincentVanCode" todo = "goose_plugins.toolkits.todo:TodoToolkit" complexity_analyzer = "goose_plugins.toolkits.complexity_analyzer:CodeComplexityToolkit" dockerize_my_app = "goose_plugins.toolkits.dockerize_my_app:DockerizationToolkit" +filetype_analyzer = "goose_plugins.toolkits.filetype_analyzer:FileTypeAnalyzerToolkit" [build-system] diff --git a/src/goose_plugins/toolkits/filetype_analyzer.py b/src/goose_plugins/toolkits/filetype_analyzer.py new file mode 100644 index 0000000..7f81e94 --- /dev/null +++ b/src/goose_plugins/toolkits/filetype_analyzer.py @@ -0,0 +1,166 @@ +import os +import json +from goose.toolkit.base import Toolkit, tool + + +class FileTypeAnalyzerToolkit(Toolkit): + """Analyzes the percentage distribution of file types in a project.""" + + def __init__(self, *args: object, **kwargs: object) -> None: + super().__init__(*args, **kwargs) + + @tool + def analyze_file_types( + self, + project_dir: str, + include_subdirectories: bool = True, + exclude_paths: list[str] = [], + output_format: str = "json", + output_file: str | None = None, + visualize: bool = True, + ) -> dict: + """ + Analyze file types in a directory with explicit path exclusions. + + Args: + project_dir (str): Path to the project directory. + include_subdirectories (bool): Include subdirectories in the analysis. + exclude_paths (list[str]): List of file or directory paths to exclude. + output_format (str): Output format, either "json" or "txt". + output_file (str, optional): Output file for results. + visualize (bool): Whether to visualize results in CLI. + + Returns: + dict: Analysis results. + """ + try: + analyzer = FileTypeAnalyzer() + result = analyzer.analyze(project_dir, include_subdirectories, exclude_paths) + + if output_file: + reporter = ReportGenerator() + reporter.generate_report(result, output_format, output_file) + + if visualize: + visualizer = Visualizer() + visualizer.display_summary(result) + visualizer.display_bar_chart(result) + visualizer.display_pie_chart(result) + + return result + except Exception as e: + return {"status": "error", "message": str(e)} + + +class FileTypeAnalyzer: + """Performs file type analysis with explicit path exclusions.""" + + def analyze( + self, + directory: str, + recursive: bool = True, + exclude_paths: list[str] | None = None, + ) -> dict: + """ + Analyze file types in a directory. + + Args: + directory (str): Path to the directory to analyze. + recursive (bool): Whether to include subdirectories. + exclude_paths (list, optional): List of file or directory paths to exclude. + + Returns: + dict: Analysis results including file counts, percentages, and total files. + """ + + if not os.path.exists(directory): + raise FileNotFoundError(f"The directory '{directory}' does not exist.") + + file_counts = {} + total_files = 0 + + exclude_paths = [os.path.abspath(os.path.join(directory, path)) for path in (exclude_paths or [])] + + for root, _, files in os.walk(directory): + # Skip excluded directories and their subdirectories + if any(root.startswith(excluded) for excluded in exclude_paths): + continue + + for file in files: + file_path = os.path.abspath(os.path.join(root, file)) + + # Skip excluded files + if any(file_path.startswith(excluded) for excluded in exclude_paths): + continue + + # Get the file extension and count it + ext = os.path.splitext(file)[1].lower() + file_counts[ext] = file_counts.get(ext, 0) + 1 + total_files += 1 + + if not recursive: + break + + # Calculate percentages + percentages = {ext: (count / total_files) * 100 for ext, count in file_counts.items()} + + return { + "file_counts": file_counts, + "percentages": percentages, + "total_files": total_files, + } + + +class ReportGenerator: + """Generates analysis reports.""" + + def generate_report(self, data: dict, format: str, output_file: str) -> None: + if format == "json": + with open(output_file, "w") as f: + json.dump(data, f, indent=4) + elif format == "txt": + with open(output_file, "w") as f: + for ext, percent in data["percentages"].items(): + f.write(f"{ext}: {percent:.2f}%\n") + + +class Visualizer: + """Creates visual CLI representations for file type analysis.""" + + def display_bar_chart(self, data: dict) -> None: + """ + Display a bar chart showing the percentage of file types. + + Args: + data (dict): Analysis results containing percentages of file types. + """ + print("\nFile Type Distribution (Bar Chart):\n") + for ext, percent in sorted(data["percentages"].items(), key=lambda x: -x[1]): + bar = "█" * int(percent / 2) + print(f"{ext or 'Other':<10}: {bar} {percent:.2f}%") + + def display_pie_chart(self, data: dict) -> None: + """ + Display a pie chart-like visualization for file type percentages. + + Args: + data (dict): Analysis results containing percentages of file types. + """ + print("\nFile Type Distribution (Pie Chart):\n") + total = sum(data["percentages"].values()) + for ext, percent in sorted(data["percentages"].items(), key=lambda x: -x[1]): + segment = "○" * int((percent / total) * 20) + print(f"{ext or 'Other':<10}: {segment} {percent:.2f}%") + + def display_summary(self, data: dict) -> None: + """ + Display a summary of the analysis. + + Args: + data (dict): Analysis results containing total files and counts. + """ + print("\nFile Type Analysis Summary:\n") + print(f"Total Files Analyzed: {data['total_files']}") + print("File Counts by Type:") + for ext, count in sorted(data["file_counts"].items(), key=lambda x: -x[1]): + print(f" {ext or 'Other':<10}: {count}") diff --git a/tests/toolkits/test_filetype_analyzer.py b/tests/toolkits/test_filetype_analyzer.py new file mode 100644 index 0000000..8fb3126 --- /dev/null +++ b/tests/toolkits/test_filetype_analyzer.py @@ -0,0 +1,63 @@ +import pytest +from unittest.mock import MagicMock +from goose_plugins.toolkits.filetype_analyzer import FileTypeAnalyzerToolkit + + +@pytest.fixture +def toolkit(): + return FileTypeAnalyzerToolkit(notifier=MagicMock()) + + +@pytest.fixture +def mock_directory(tmp_path): + """Fixture to create a temporary directory structure for testing.""" + (tmp_path / ".git").mkdir() + (tmp_path / ".git" / "config").touch() + (tmp_path / ".git" / "objects").mkdir() + (tmp_path / ".git" / "objects" / "file1").touch() + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "doc1.txt").touch() + (tmp_path / "file1.py").touch() + (tmp_path / "file2.js").touch() + + return str(tmp_path) + + +def test_analyze_with_exclusions(toolkit, mock_directory): + exclude_paths = [".git"] + results = toolkit.analyze_file_types(mock_directory, exclude_paths=exclude_paths) + + assert results["total_files"] == 3 + assert results["file_counts"] == {".py": 1, ".js": 1, ".txt": 1} + assert ".git/config" not in results + + +def test_analyze_without_exclusions(toolkit, mock_directory): + results = toolkit.analyze_file_types(mock_directory) + + assert results["total_files"] == 5 + assert results["file_counts"] == { + ".py": 1, + ".js": 1, + ".txt": 1, + "": 2, + } + + +def test_empty_directory(toolkit, tmp_path): + results = toolkit.analyze_file_types(str(tmp_path)) + + assert results["total_files"] == 0 + assert results["file_counts"] == {} + + +def test_non_recursive_analysis(toolkit, mock_directory): + results = toolkit.analyze_file_types(mock_directory, include_subdirectories=False) + + assert results["total_files"] == 2 + assert results["file_counts"] == {".py": 1, ".js": 1} + + +def test_invalid_directory(toolkit): + result = toolkit.analyze_file_types("/nonexistent/path") + assert result["status"] == "error"