diff --git a/README_code_quality.md b/README_code_quality.md new file mode 100644 index 0000000000000000000000000000000000000000..b9da0e0a28d75711e76b658072b1850821bda16f --- /dev/null +++ b/README_code_quality.md @@ -0,0 +1,136 @@ +# Code Quality Analyzer + +This Python script analyzes code quality in the `src` folder using multiple code quality tools and generates visualizations of the issues found. + +## Features + +- Runs multiple code quality tools: + - **ruff**: Fast Python linter + - **flake8**: Style guide enforcement + - **mypy**: Static type checking + - **pylint**: Python code analysis + - **isort**: Import sorting + - **black**: Code formatting + +- Classifies issues into categories: + - Style issues (formatting, whitespace, etc.) + - Complexity issues (cyclomatic complexity, etc.) + - Import issues (unused imports, import order, etc.) + - Naming issues (variable naming conventions, etc.) + - Documentation issues (missing docstrings, etc.) + - Type checking issues (type annotations, etc.) + - Security issues (potential security vulnerabilities) + - Other (unclassified issues) + +- Generates visualizations: + - Pie chart showing distribution of issues by category + - Bar chart showing number of issues per subfolder + +## Prerequisites + +1. **Python 3.7+** +2. **Code quality tools** (install as needed): + ```bash + pip install ruff flake8 mypy pylint isort black + ``` +3. **Visualization dependencies**: + ```bash + pip install -r requirements_code_quality.txt + ``` + +## Usage + +### Basic Usage + +Run the script from the project root directory: + +```bash +python code_quality_analyzer.py +``` + +This will analyze the `src` folder and generate: +- A console summary of all issues found +- A visualization saved as `code_quality_report.png` + +### Command Line Options + +```bash +python code_quality_analyzer.py --help +``` + +Available options: +- `--src-path PATH`: Specify a different source directory (default: `src`) +- `--no-plot`: Skip generating plots (only console output) + +### Examples + +Analyze a different directory: +```bash +python code_quality_analyzer.py --src-path my_code +``` + +Run without generating plots: +```bash +python code_quality_analyzer.py --no-plot +``` + +## Output + +### Console Output + +The script provides a detailed summary including: +- Total number of issues found +- Breakdown by tool +- Breakdown by category with percentages +- Breakdown by subfolder + +### Visualization + +The script generates a PNG file (`code_quality_report.png`) containing: +1. **Pie Chart**: Shows the distribution of issues by category +2. **Bar Chart**: Shows the number of issues found in each subfolder + +## Issue Classification + +The script automatically classifies issues based on error codes and messages: + +- **Style**: Formatting, whitespace, line length, etc. +- **Complexity**: Cyclomatic complexity, function length, etc. +- **Imports**: Unused imports, import order, etc. +- **Naming**: Variable naming conventions, etc. +- **Documentation**: Missing docstrings, etc. +- **Type Checking**: Type annotation issues +- **Security**: Potential security vulnerabilities +- **Other**: Unclassified issues + +## Troubleshooting + +### Missing Tools + +If a tool is not installed, the script will skip it and continue with the available tools. Install missing tools with: + +```bash +pip install ruff flake8 mypy pylint isort black +``` + +### Missing Dependencies + +If visualization dependencies are missing: + +```bash +pip install matplotlib seaborn numpy +``` + +### Permission Issues + +Make sure the script has read access to the source directory and write access to create the output image. + +## Customization + +You can modify the script to: +- Add new code quality tools +- Adjust issue classification rules +- Change visualization styles +- Add new output formats + +The main configuration is in the `categories` dictionary in the `CodeQualityAnalyzer` class. \ No newline at end of file diff --git a/code_quality_analyzer.py b/code_quality_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..f6bdf94c1de779ec7c5bee7e1f77dd2c8732629a --- /dev/null +++ b/code_quality_analyzer.py @@ -0,0 +1,319 @@ +""" +Code Quality Analyzer + +This script runs multiple code quality tools on the 'src' folder and generates +visualizations showing the distribution of issues by category and by subfolder. +""" + +import os +import sys +import subprocess +import re +import json +from pathlib import Path +from collections import defaultdict, Counter +import matplotlib.pyplot as plt +import seaborn as sns +from typing import Dict, List, Tuple, Any +import argparse + +# Set up matplotlib for better plots +plt.style.use('default') +sns.set_palette("husl") + +class CodeQualityAnalyzer: + """Analyzes code quality using multiple tools and generates reports.""" + + def __init__(self, src_path: str = "src"): + self.src_path = Path(src_path) + self.issues = [] + self.tool_results = {} + + # Define issue categories + self.categories = { + 'style': ['E', 'W', 'C', 'F', 'I', 'N', 'UP', 'B', 'SIM', 'ARG', 'PIE', 'TCH', 'Q', 'RSE', 'RET', 'SLF', 'SLOT', 'TID', 'TCH', 'ARG', 'PIE', 'SIM', 'FBT', 'COM', 'C4', 'DTZ', 'T20', 'AIR', 'PERF', 'FURB', 'LOG', 'RUF'], + 'complexity': ['C901', 'C901', 'PLR0911', 'PLR0912', 'PLR0913', 'PLR0915'], + 'imports': ['E401', 'E402', 'F401', 'F403', 'F405', 'I001', 'I002', 'I003', 'I004', 'I005', 'I006', 'I007', 'I008', 'I009', 'I010', 'I011', 'I012', 'I013', 'I014', 'I015', 'I016', 'I017', 'I018', 'I019', 'I020', 'I021', 'I022', 'I023', 'I024', 'I025', 'I026', 'I027', 'I028', 'I029', 'I030', 'I031', 'I032', 'I033', 'I034', 'I035', 'I036', 'I037', 'I038', 'I039', 'I040', 'I041', 'I042', 'I043', 'I044', 'I045', 'I046', 'I047', 'I048', 'I049', 'I050', 'I051', 'I052', 'I053', 'I054', 'I055', 'I056', 'I057', 'I058', 'I059', 'I060', 'I061', 'I062', 'I063', 'I064', 'I065', 'I066', 'I067', 'I068', 'I069', 'I070', 'I071', 'I072', 'I073', 'I074', 'I075', 'I076', 'I077', 'I078', 'I079', 'I080', 'I081', 'I082', 'I083', 'I084', 'I085', 'I086', 'I087', 'I088', 'I089', 'I090', 'I091', 'I092', 'I093', 'I094', 'I095', 'I096', 'I097', 'I098', 'I099', 'I100'], + 'naming': ['N801', 'N802', 'N803', 'N804', 'N805', 'N806', 'N807', 'N808', 'N809', 'N810', 'N811', 'N812', 'N813', 'N814', 'N815', 'N816', 'N817', 'N818', 'N819', 'N820', 'N821', 'N822', 'N823', 'N824', 'N825', 'N826', 'N827', 'N828', 'N829', 'N830', 'N831', 'N832', 'N833', 'N834', 'N835', 'N836', 'N837', 'N838', 'N839', 'N840', 'N841', 'N842', 'N843', 'N844', 'N845', 'N846', 'N847', 'N848', 'N849', 'N850', 'N851', 'N852', 'N853', 'N854', 'N855', 'N856', 'N857', 'N858', 'N859', 'N860', 'N861', 'N862', 'N863', 'N864', 'N865', 'N866', 'N867', 'N868', 'N869', 'N870', 'N871', 'N872', 'N873', 'N874', 'N875', 'N876', 'N877', 'N878', 'N879', 'N880', 'N881', 'N882', 'N883', 'N884', 'N885', 'N886', 'N887', 'N888', 'N889', 'N890', 'N891', 'N892', 'N893', 'N894', 'N895', 'N896', 'N897', 'N898', 'N899', 'N900'], + 'documentation': ['D100', 'D101', 'D102', 'D103', 'D104', 'D105', 'D106', 'D107', 'D200', 'D201', 'D202', 'D203', 'D204', 'D205', 'D206', 'D207', 'D208', 'D209', 'D210', 'D211', 'D212', 'D213', 'D214', 'D215', 'D300', 'D301', 'D400', 'D401', 'D402', 'D403', 'D404', 'D405', 'D406', 'D407', 'D408', 'D409', 'D410', 'D411', 'D412', 'D413', 'D414', 'D415', 'D416', 'D417', 'D418', 'D419', 'D420', 'D421', 'D422', 'D423', 'D424', 'D425', 'D426', 'D427', 'D428', 'D429', 'D430', 'D431', 'D432', 'D433', 'D434', 'D435', 'D436', 'D437', 'D438', 'D439', 'D440', 'D441', 'D442', 'D443', 'D444', 'D445', 'D446', 'D447', 'D448', 'D449', 'D450', 'D451', 'D452', 'D453', 'D454', 'D455', 'D456', 'D457', 'D458', 'D459', 'D460', 'D461', 'D462', 'D463', 'D464', 'D465', 'D466', 'D467', 'D468', 'D469', 'D470', 'D471', 'D472', 'D473', 'D474', 'D475', 'D476', 'D477', 'D478', 'D479', 'D480', 'D481', 'D482', 'D483', 'D484', 'D485', 'D486', 'D487', 'D488', 'D489', 'D490', 'D491', 'D492', 'D493', 'D494', 'D495', 'D496', 'D497', 'D498', 'D499', 'D500'], + 'type_checking': ['mypy', 'type', 'annotation'], + 'security': ['S101', 'S102', 'S103', 'S104', 'S105', 'S106', 'S107', 'S108', 'S109', 'S110', 'S111', 'S112', 'S113', 'S114', 'S115', 'S116', 'S117', 'S118', 'S119', 'S120', 'S121', 'S122', 'S123', 'S124', 'S125', 'S126', 'S127', 'S128', 'S129', 'S130', 'S131', 'S132', 'S133', 'S134', 'S135', 'S136', 'S137', 'S138', 'S139', 'S140', 'S141', 'S142', 'S143', 'S144', 'S145', 'S146', 'S147', 'S148', 'S149', 'S150', 'S151', 'S152', 'S153', 'S154', 'S155', 'S156', 'S157', 'S158', 'S159', 'S160', 'S161', 'S162', 'S163', 'S164', 'S165', 'S166', 'S167', 'S168', 'S169', 'S170', 'S171', 'S172', 'S173', 'S174', 'S175', 'S176', 'S177', 'S178', 'S179', 'S180', 'S181', 'S182', 'S183', 'S184', 'S185', 'S186', 'S187', 'S188', 'S189', 'S190', 'S191', 'S192', 'S193', 'S194', 'S195', 'S196', 'S197', 'S198', 'S199', 'S200', 'S201', 'S202', 'S203', 'S204', 'S205', 'S206', 'S207', 'S208', 'S209', 'S210', 'S211', 'S212', 'S213', 'S214', 'S215', 'S216', 'S217', 'S218', 'S219', 'S220', 'S221', 'S222', 'S223', 'S224', 'S225', 'S226', 'S227', 'S228', 'S229', 'S230', 'S231', 'S232', 'S233', 'S234', 'S235', 'S236', 'S237', 'S238', 'S239', 'S240', 'S241', 'S242', 'S243', 'S244', 'S245', 'S246', 'S247', 'S248', 'S249', 'S250', 'S251', 'S252', 'S253', 'S254', 'S255', 'S256', 'S257', 'S258', 'S259', 'S260', 'S261', 'S262', 'S263', 'S264', 'S265', 'S266', 'S267', 'S268', 'S269', 'S270', 'S271', 'S272', 'S273', 'S274', 'S275', 'S276', 'S277', 'S278', 'S279', 'S280', 'S281', 'S282', 'S283', 'S284', 'S285', 'S286', 'S287', 'S288', 'S289', 'S290', 'S291', 'S292', 'S293', 'S294', 'S295', 'S296', 'S297', 'S298', 'S299', 'S300', 'S301', 'S302', 'S303', 'S304', 'S305', 'S306', 'S307', 'S308', 'S309', 'S310', 'S311', 'S312', 'S313', 'S314', 'S315', 'S316', 'S317', 'S318', 'S319', 'S320', 'S321', 'S322', 'S323', 'S324', 'S325', 'S326', 'S327', 'S328', 'S329', 'S330', 'S331', 'S332', 'S333', 'S334', 'S335', 'S336', 'S337', 'S338', 'S339', 'S340', 'S341', 'S342', 'S343', 'S344', 'S345', 'S346', 'S347', 'S348', 'S349', 'S350', 'S351', 'S352', 'S353', 'S354', 'S355', 'S356', 'S357', 'S358', 'S359', 'S360', 'S361', 'S362', 'S363', 'S364', 'S365', 'S366', 'S367', 'S368', 'S369', 'S370', 'S371', 'S372', 'S373', 'S374', 'S375', 'S376', 'S377', 'S378', 'S379', 'S380', 'S381', 'S382', 'S383', 'S384', 'S385', 'S386', 'S387', 'S388', 'S389', 'S390', 'S391', 'S392', 'S393', 'S394', 'S395', 'S396', 'S397', 'S398', 'S399', 'S400', 'S401', 'S402', 'S403', 'S404', 'S405', 'S406', 'S407', 'S408', 'S409', 'S410', 'S411', 'S412', 'S413', 'S414', 'S415', 'S416', 'S417', 'S418', 'S419', 'S420', 'S421', 'S422', 'S423', 'S424', 'S425', 'S426', 'S427', 'S428', 'S429', 'S430', 'S431', 'S432', 'S433', 'S434', 'S435', 'S436', 'S437', 'S438', 'S439', 'S440', 'S441', 'S442', 'S443', 'S444', 'S445', 'S446', 'S447', 'S448', 'S449', 'S450', 'S451', 'S452', 'S453', 'S454', 'S455', 'S456', 'S457', 'S458', 'S459', 'S460', 'S461', 'S462', 'S463', 'S464', 'S465', 'S466', 'S467', 'S468', 'S469', 'S470', 'S471', 'S472', 'S473', 'S474', 'S475', 'S476', 'S477', 'S478', 'S479', 'S480', 'S481', 'S482', 'S483', 'S484', 'S485', 'S486', 'S487', 'S488', 'S489', 'S490', 'S491', 'S492', 'S493', 'S494', 'S495', 'S496', 'S497', 'S498', 'S499', 'S500'], + 'other': [] # Catch-all for unclassified issues + } + + def run_tool(self, tool_name: str, command: List[str]) -> List[str]: + """Run a code quality tool and return its output.""" + try: + print(f"Running {tool_name}...") + result = subprocess.run( + command, + capture_output=True, + text=True, + cwd=self.src_path.parent + ) + + if result.returncode == 0 and not result.stdout.strip(): + print(f" {tool_name}: No issues found") + return [] + + output_lines = result.stdout.strip().split('\n') if result.stdout else [] + error_lines = result.stderr.strip().split('\n') if result.stderr else [] + + # Combine stdout and stderr + all_lines = output_lines + error_lines + all_lines = [line.strip() for line in all_lines if line.strip()] + + print(f" {tool_name}: Found {len(all_lines)} issues") + return all_lines + + except FileNotFoundError: + print(f" {tool_name}: Tool not found, skipping...") + return [] + except Exception as e: + print(f" {tool_name}: Error running tool: {e}") + return [] + + def run_all_tools(self): + """Run all code quality tools.""" + tools = { + 'ruff': ['ruff', 'check', str(self.src_path)], + 'flake8': ['flake8', str(self.src_path)], + 'mypy': ['mypy', str(self.src_path)], + 'pylint': ['pylint', str(self.src_path)], + 'isort': ['isort', '--check-only', '--diff', str(self.src_path)], + 'black': ['black', '--check', '--diff', str(self.src_path)] + } + + for tool_name, command in tools.items(): + self.tool_results[tool_name] = self.run_tool(tool_name, command) + + def parse_issue(self, line: str, tool: str) -> Dict[str, Any]: + """Parse a single issue line from a tool output.""" + issue = { + 'tool': tool, + 'raw_line': line, + 'file': '', + 'line': 0, + 'column': 0, + 'code': '', + 'message': '', + 'category': 'other' + } + + try: + if tool == 'ruff': + # ruff format: file:line:column: code message + match = re.match(r'^([^:]+):(\d+):(\d+):\s*([A-Z]\d+)\s+(.+)$', line) + if match: + issue.update({ + 'file': match.group(1), + 'line': int(match.group(2)), + 'column': int(match.group(3)), + 'code': match.group(4), + 'message': match.group(5) + }) + + elif tool == 'flake8': + # flake8 format: file:line:column: code message + match = re.match(r'^([^:]+):(\d+):(\d+):\s*([A-Z]\d+)\s+(.+)$', line) + if match: + issue.update({ + 'file': match.group(1), + 'line': int(match.group(2)), + 'column': int(match.group(3)), + 'code': match.group(4), + 'message': match.group(5) + }) + + elif tool == 'mypy': + # mypy format: file:line: error: message + match = re.match(r'^([^:]+):(\d+):\s*error:\s*(.+)$', line) + if match: + issue.update({ + 'file': match.group(1), + 'line': int(match.group(2)), + 'code': 'mypy', + 'message': match.group(3) + }) + + elif tool == 'pylint': + # pylint format: file:line:column: code: message + match = re.match(r'^([^:]+):(\d+):(\d+):\s*([A-Z]\d+):\s*(.+)$', line) + if match: + issue.update({ + 'file': match.group(1), + 'line': int(match.group(2)), + 'column': int(match.group(3)), + 'code': match.group(4), + 'message': match.group(5) + }) + + elif tool in ['isort', 'black']: + # isort/black format: would reformat file + match = re.match(r'^would\s+(?:reformat|fix)\s+(.+)$', line) + if match: + issue.update({ + 'file': match.group(1), + 'code': tool, + 'message': f'Code formatting issue detected by {tool}' + }) + + except Exception as e: + print(f"Error parsing line '{line}' from {tool}: {e}") + + return issue + + def classify_issue(self, issue: Dict[str, Any]) -> str: + """Classify an issue into a category.""" + code = issue.get('code', '').upper() + message = issue.get('message', '').lower() + + for category, patterns in self.categories.items(): + if category == 'other': + continue + + for pattern in patterns: + if pattern.upper() in code or pattern.lower() in message: + return category + + return 'other' + + def process_issues(self): + """Process all tool outputs and classify issues.""" + for tool, lines in self.tool_results.items(): + for line in lines: + issue = self.parse_issue(line, tool) + if issue['file']: # Only include issues with valid file paths + issue['category'] = self.classify_issue(issue) + self.issues.append(issue) + + def get_subfolder_issues(self) -> Dict[str, int]: + """Count issues per subfolder.""" + subfolder_counts = defaultdict(int) + + for issue in self.issues: + file_path = Path(issue['file']) + if file_path.is_relative_to(self.src_path): + # Get the first subfolder + parts = file_path.relative_to(self.src_path).parts + if parts: + subfolder = parts[0] + subfolder_counts[subfolder] += 1 + + return dict(subfolder_counts) + + def get_category_counts(self) -> Dict[str, int]: + """Count issues by category.""" + category_counts = Counter(issue['category'] for issue in self.issues) + return dict(category_counts) + + def create_visualizations(self): + """Create the pie chart and bar chart visualizations.""" + category_counts = self.get_category_counts() + subfolder_counts = self.get_subfolder_issues() + + # Create figure with two subplots + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8)) + + # Pie chart for issue categories + if category_counts: + labels = list(category_counts.keys()) + sizes = list(category_counts.values()) + colors = plt.cm.Set3(np.linspace(0, 1, len(labels))) + + ax1.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors) + ax1.set_title('Code Quality Issues by Category', fontsize=14, fontweight='bold') + ax1.axis('equal') + else: + ax1.text(0.5, 0.5, 'No issues found', ha='center', va='center', transform=ax1.transAxes) + ax1.set_title('Code Quality Issues by Category', fontsize=14, fontweight='bold') + + # Bar chart for issues per subfolder + if subfolder_counts: + subfolders = list(subfolder_counts.keys()) + counts = list(subfolder_counts.values()) + + bars = ax2.bar(range(len(subfolders)), counts, color='skyblue', alpha=0.7) + ax2.set_xlabel('Subfolder', fontsize=12) + ax2.set_ylabel('Number of Issues', fontsize=12) + ax2.set_title('Issues per Subfolder', fontsize=14, fontweight='bold') + ax2.set_xticks(range(len(subfolders))) + ax2.set_xticklabels(subfolders, rotation=45, ha='right') + + # Add value labels on bars + for bar, count in zip(bars, counts): + height = bar.get_height() + ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1, + f'{count}', ha='center', va='bottom') + else: + ax2.text(0.5, 0.5, 'No issues found', ha='center', va='center', transform=ax2.transAxes) + ax2.set_title('Issues per Subfolder', fontsize=14, fontweight='bold') + + plt.tight_layout() + plt.savefig('code_quality_report.png', dpi=300, bbox_inches='tight') + plt.show() + + print(f"\nVisualization saved as 'code_quality_report.png'") + + def print_summary(self): + """Print a summary of the analysis.""" + print("\n" + "="*60) + print("CODE QUALITY ANALYSIS SUMMARY") + print("="*60) + + total_issues = len(self.issues) + print(f"Total issues found: {total_issues}") + + if total_issues > 0: + print("\nIssues by tool:") + tool_counts = Counter(issue['tool'] for issue in self.issues) + for tool, count in tool_counts.most_common(): + print(f" {tool}: {count}") + + print("\nIssues by category:") + category_counts = self.get_category_counts() + for category, count in sorted(category_counts.items(), key=lambda x: x[1], reverse=True): + percentage = (count / total_issues) * 100 + print(f" {category}: {count} ({percentage:.1f}%)") + + print("\nIssues by subfolder:") + subfolder_counts = self.get_subfolder_issues() + for subfolder, count in sorted(subfolder_counts.items(), key=lambda x: x[1], reverse=True): + print(f" {subfolder}: {count}") + + print("\n" + "="*60) + + def run_analysis(self): + """Run the complete analysis.""" + print("Starting code quality analysis...") + print(f"Analyzing code in: {self.src_path}") + + if not self.src_path.exists(): + print(f"Error: Source path '{self.src_path}' does not exist!") + return + + self.run_all_tools() + self.process_issues() + self.print_summary() + self.create_visualizations() + +def main(): + """Main function.""" + parser = argparse.ArgumentParser(description='Analyze code quality using multiple tools') + parser.add_argument('--src-path', default='src', help='Path to source code directory (default: src)') + parser.add_argument('--no-plot', action='store_true', help='Skip generating plots') + + args = parser.parse_args() + + # Import numpy for matplotlib + try: + import numpy as np + except ImportError: + print("Error: numpy is required for plotting. Install it with: pip install numpy") + return + + analyzer = CodeQualityAnalyzer(args.src_path) + analyzer.run_analysis() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements_code_quality.txt b/requirements_code_quality.txt new file mode 100644 index 0000000000000000000000000000000000000000..d934dbf1b0aa79fea0510c2106587af0d7669fe6 --- /dev/null +++ b/requirements_code_quality.txt @@ -0,0 +1,9 @@ +matplotlib>=3.5.0 +seaborn>=0.11.0 +numpy>=1.21.0 +ruff +flake8 +mypy +pylint +isort +black