Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ __pycache__/
results/
tests/config.py
.vscode/
settings.local.json
settings.local.json
*.stackdump
11 changes: 11 additions & 0 deletions git_log_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
git_log_analysis package public API

Re-exports the most commonly used classes so callers can import directly
from the package root instead of from the internal mcp sub-package.
"""

from git_log_analysis.mcp.service import GitLogService
from git_log_analysis.mcp.query_engine import QueryParams

__all__ = ["GitLogService", "QueryParams"]
16 changes: 6 additions & 10 deletions git_log_analysis/cli/projects_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import argparse

from git_log_analysis.mcp import GitLogService
from git_log_analysis import GitLogService

from .types import SubParser

Expand Down Expand Up @@ -53,24 +53,20 @@ def _projects_command(args):

print(f"Available projects: {len(projects)}\n")

# Calculate commit count for each project
all_commits = service.load_commits()

# Count commits per project
project_stats = {}
for commit in all_commits:
project_stats[commit.project] = project_stats.get(commit.project, 0) + 1
# Get commit count per project (no file loading required)
project_commit_counts = service.project_commit_counts
total_commits = sum(project_commit_counts.values())

# Display results
print(f"{'Project':<20} {'Commits':>10}")
print("-" * 32)

for project_name in sorted(projects):
commit_count = project_stats.get(project_name, 0)
commit_count = project_commit_counts.get(project_name, 0)
print(f"{project_name:<20} {commit_count:>10}")

print("-" * 32)
print(f"{'Total':<20} {len(all_commits):>10}")
print(f"{'Total':<20} {total_commits:>10}")

except FileNotFoundError as e:
print(f"❌ Error: {e}")
Expand Down
11 changes: 8 additions & 3 deletions git_log_analysis/cli/search_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import argparse

from git_log_analysis.mcp import GitLogService, QueryParams
from git_log_analysis import GitLogService, QueryParams

from .formatters import get_formatter
from .types import SubParser
Expand Down Expand Up @@ -52,7 +52,10 @@ def setup_search_parser(subparsers: SubParser):
# Search filters
search_group = parser.add_argument_group("Search Filters")
search_group.add_argument(
"-k", "--keyword", default="", help="Keyword to search for (searches in commit messages)"
"-k",
"--keyword",
default="",
help="Keyword to search for (searches in commit messages)",
)
search_group.add_argument(
"-s", "--start-date", help="Start date (YYYY-MM-DD format). Example: 2024-01-01"
Expand Down Expand Up @@ -82,7 +85,9 @@ def setup_search_parser(subparsers: SubParser):
help="Output format (default: text)",
)
output_group.add_argument(
"-o", "--output", help="Save results to a file (outputs to stdout if not specified)"
"-o",
"--output",
help="Save results to a file (outputs to stdout if not specified)",
)

parser.set_defaults(func=_search_command)
Expand Down
46 changes: 24 additions & 22 deletions git_log_analysis/git_diff_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re
import json
from datetime import datetime
from typing import List, Dict, Optional, Any
from dataclasses import dataclass, asdict
from typing import Any
from git_log_analysis.mcp.utils import parse_date_to_naive


@dataclass
Expand All @@ -23,19 +23,19 @@ class DiffChunk:
old_count: int
new_start: int
new_count: int
lines: List[DiffLine]
lines: list[DiffLine]


@dataclass
class FileChange:
"""File change information"""

old_file: Optional[str]
new_file: Optional[str]
file_mode: Optional[str]
old_file: str | None
new_file: str | None
file_mode: str | None
change_type: str # 'modified', 'added', 'deleted', 'renamed'
index: Optional[str]
chunks: List[DiffChunk]
index: str | None
chunks: list[DiffChunk]


@dataclass
Expand All @@ -47,13 +47,13 @@ class CommitData:
email: str
date: str
message: str
changes: List[FileChange]
changes: list[FileChange]


class DiffParser:
def extract_diff_section(
self, lines: List[str], start_index: int
) -> tuple[List[str], int]:
self, lines: list[str], start_index: int
) -> tuple[list[str], int]:
"""
Extracts a diff section.

Expand Down Expand Up @@ -82,7 +82,7 @@ def extract_diff_section(

return diff_lines, i - start_index

def parse_diff(self, diff_lines: List[str]) -> Optional[FileChange]:
def parse_diff(self, diff_lines: list[str]) -> FileChange | None:
"""Parses a diff."""
if not diff_lines:
return None
Expand Down Expand Up @@ -118,12 +118,16 @@ def parse_diff(self, diff_lines: List[str]) -> Optional[FileChange]:
change.index = line.replace("index ", "").strip()

elif line.startswith("--- "):
old_path = line.replace("--- ", "").replace("a/", "")
old_path = line[4:]
if old_path.startswith("a/"):
old_path = old_path[2:]
if old_path != "/dev/null":
change.old_file = old_path

elif line.startswith("+++ "):
new_path = line.replace("+++ ", "").replace("b/", "")
new_path = line[4:]
if new_path.startswith("b/"):
new_path = new_path[2:]
if new_path != "/dev/null":
change.new_file = new_path

Expand Down Expand Up @@ -174,10 +178,10 @@ class GitLogParser:
"""Git Log Diff parser"""

def __init__(self):
self.commits: List[CommitData] = []
self.commits: list[CommitData] = []
self.diff_parser = DiffParser()

def parse_file(self, file_path: str, encoding: str = "utf-8") -> List[CommitData]:
def parse_file(self, file_path: str, encoding: str = "utf-8") -> list[CommitData]:
"""Parses Git log from a file."""
try:
with open(file_path, "r", encoding=encoding) as f:
Expand All @@ -186,7 +190,7 @@ def parse_file(self, file_path: str, encoding: str = "utf-8") -> List[CommitData
except Exception as e:
raise Exception(f"Error parsing file: {e}")

def parse(self, content: str) -> List[CommitData]:
def parse(self, content: str) -> list[CommitData]:
"""Parses Git log content."""
# CRLF -> LF
normalized_content = content.replace("\r\n", "\n")
Expand All @@ -202,7 +206,7 @@ def parse(self, content: str) -> List[CommitData]:

return self.commits

def _parse_commit(self, commit_section: str) -> Optional[CommitData]:
def _parse_commit(self, commit_section: str) -> CommitData | None:
"""Parses an individual commit section."""
lines = commit_section.split("\n")

Expand Down Expand Up @@ -255,7 +259,7 @@ def _parse_commit(self, commit_section: str) -> Optional[CommitData]:
changes=commit_data["changes"],
)

def get_stats(self) -> Dict[str, Any]:
def get_stats(self) -> dict[str, Any]:
"""Returns statistics for parsed commits."""
if not self.commits:
return {}
Expand Down Expand Up @@ -286,9 +290,7 @@ def get_stats(self) -> Dict[str, Any]:
dates = []
for commit in self.commits:
try:
# Parse "2023-12-11 08:05:23 +0900" format
date_part = commit.date.split(" +")[0] # Remove timezone
dt = datetime.strptime(date_part, "%Y-%m-%d %H:%M:%S")
dt = parse_date_to_naive(commit.date)
dates.append(dt)
except Exception as e:
raise Exception(f"Error parsing date: {e}")
Expand Down
4 changes: 1 addition & 3 deletions git_log_analysis/git_monthly_log_generator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import os
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
Expand All @@ -16,7 +15,6 @@ def _month_range(start: datetime, end: datetime):


def generate_git_logs(repo_path: Path, start_year: int, start_month: int):
os.chdir(repo_path)
if not (repo_path / ".git").exists():
raise FileNotFoundError(f"'{repo_path}' is not a Git repository.")
if start_year <= 0:
Expand Down Expand Up @@ -52,7 +50,7 @@ def generate_git_logs(repo_path: Path, start_year: int, start_month: int):
]

with open(filepath, "w", encoding="utf-8") as f:
subprocess.run(git_log_cmd, stdout=f, check=True)
subprocess.run(git_log_cmd, stdout=f, cwd=repo_path, check=True)

print(f"Done! Monthly logs are saved in {output_dir}.")

Expand Down
53 changes: 35 additions & 18 deletions git_log_analysis/mcp/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import sys
import io
from pathlib import Path
from typing import Optional, Dict
from dataclasses import dataclass

utf8_stderr = io.TextIOWrapper(
Expand Down Expand Up @@ -50,8 +49,6 @@ def __init__(self, results_dir: str):
if not self.results_dir.exists():
raise FileNotFoundError(f"Directory not found: {results_dir}")

self._projects: list[str]
self._summary_file_info_list: list[FileInfo]
self._projects, self._summary_file_info_list = self._scan_projects()

def _scan_projects(self) -> tuple[list[str], list[FileInfo]]:
Expand All @@ -75,12 +72,16 @@ def _scan_projects(self) -> tuple[list[str], list[FileInfo]]:

projects: list[str] = []
all_file_metadata: list[FileInfo] = []
self._project_commit_counts: dict[str, int] = {}

for project_dir in sorted(subdirs):
project_name = project_dir.name
file_metadata = self._scan_project_files(project_dir, project_name)
file_metadata, commit_count = self._scan_project_files(
project_dir, project_name
)
all_file_metadata.extend(file_metadata)
projects.append(project_name)
self._project_commit_counts[project_name] = commit_count

logger.info(
f"Scan complete: {len(all_file_metadata)} files found. Projects: {len(projects)}"
Expand All @@ -90,9 +91,10 @@ def _scan_projects(self) -> tuple[list[str], list[FileInfo]]:

def _scan_project_files(
self, project_dir: Path, project_name: str
) -> list[FileInfo]:
) -> tuple[list[FileInfo], int]:
file_infos = []
summary_files = sorted(project_dir.glob("*_summary.json"))
total_commit_count = 0

for file_path in summary_files:
date_match = re.search(r"(\d{4}-\d{2})", file_path.name)
Expand All @@ -102,12 +104,20 @@ def _scan_project_files(
path=file_path, project=project_name, year_month=year_month
)
file_infos.append(file_info)
try:
with open(file_path, "r", encoding="utf-8") as f:
commits_data = json.load(f)
total_commit_count += len(commits_data)
except Exception as e:
logger.warning(f"Cannot count commits in {file_path.name}: {e}")
else:
logger.warning(f"Cannot parse date from file: {file_path.name}")

logger.info(f"[{project_name}] Summary: {len(summary_files)} files")
logger.info(
f"[{project_name}] Summary: {len(summary_files)} files, {total_commit_count} commits"
)

return file_infos
return file_infos, total_commit_count

def _load_file_as_commits(self, file_path: Path, project: str) -> list[Commit]:
try:
Expand All @@ -129,9 +139,9 @@ def _load_file_as_commits(self, file_path: Path, project: str) -> list[Commit]:

def load_commits(
self,
project: Optional[str] = None,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
project: str | None = None,
start_date: str | None = None,
end_date: str | None = None,
) -> list[Commit]:
"""Loads required summary files and returns the list of commits

Expand Down Expand Up @@ -168,9 +178,9 @@ def load_commits(

def _filter_summary_file_info_list(
self,
project: Optional[str],
start_date: Optional[str],
end_date: Optional[str],
project: str | None,
start_date: str | None,
end_date: str | None,
) -> list[FileInfo]:
filtered = []

Expand All @@ -191,7 +201,7 @@ def _filter_summary_file_info_list(

return filtered

def _create_error_response(self, commit: Commit, error_msg: str) -> Dict:
def _create_error_response(self, commit: Commit, error_msg: str) -> dict:
"""Creates an error response dictionary"""
return {
"commit_metadata": commit,
Expand All @@ -200,8 +210,8 @@ def _create_error_response(self, commit: Commit, error_msg: str) -> Dict:
}

def load_commit_changes(
self, commit_hash: str, project: Optional[str] = None
) -> Optional[Dict]:
self, commit_hash: str, project: str | None = None
) -> dict | None:
"""Loads the changes for a specific commit by its hash.

Returns:
Expand All @@ -220,7 +230,7 @@ def load_commit_changes(
files_to_search = self._summary_file_info_list
if project:
files_to_search = [
f for f in files_to_search if f.project == project.lower()
f for f in files_to_search if f.project.lower() == project.lower()
]

for file_info in files_to_search:
Expand All @@ -242,7 +252,9 @@ def load_commit_changes(
with open(changes_file, "r", encoding="utf-8") as f:
changes_map = json.load(f)
except Exception as e:
logger.error(f"Failed to load changes file: {changes_file} - {e}")
logger.error(
f"Failed to load changes file: {changes_file} - {e}"
)
return self._create_error_response(
commit, f"Error reading changes file: {e}"
)
Expand All @@ -257,3 +269,8 @@ def load_commit_changes(
def projects(self) -> list[str]:
"""Returns the list of projects"""
return self._projects or []

@property
def project_commit_counts(self) -> dict[str, int]:
"""Returns the commit count per project (computed at scan time)"""
return self._project_commit_counts
Loading