From 6881e950662a53b6c5301d148007be41ddc57edd Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 23 Nov 2025 08:22:24 +0000 Subject: [PATCH 1/2] Fix forbidden file detection to check all commits Previously, only the latest commit was checked for forbidden file modifications, allowing students to bypass detection by making changes in earlier commits. Changes: - Add get_all_modified_files() to check all commits in repo - Change forbidden-modifications to forbidden-files config key - Make forbidden file check a warning instead of blocking error - Add yellow cell highlighting and note for violations - Show warning on frontend with list of violated files - Update tests and documentation This allows teachers to see suspicious activity while still grading. --- CLAUDE.md | 10 ++ .../components/registration-form/index.jsx | 33 +++- grading/__init__.py | 8 + grading/github_client.py | 88 +++++++++ grading/grader.py | 71 ++++---- grading/sheets_client.py | 81 +++++++++ main.py | 32 +++- tests/test_github_client.py | 168 ++++++++++++++++++ tests/test_grader.py | 53 +++--- tests/test_sheets_client.py | 29 +++ 10 files changed, 508 insertions(+), 65 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index ca4303c..de3b5ca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,8 +79,18 @@ labs: - cpplint files: # Required files in repo - lab2.cpp + forbidden-files: # Files students can't modify (warning, not blocking) + - test_main.py # Exact file match + - tests/ # Directory prefix match ``` +### Forbidden Files Detection + +When `forbidden-files` is configured, the system checks ALL commits in the repository: +- If violations found: grade is still written, but cell is highlighted yellow with a comment +- Warning is shown to student on frontend +- This helps detect students who modify test files to cheat + ## CI/CD - **Tests**: Run on every push via `.github/workflows/tests.yml` diff --git a/frontend/courses-front/src/components/registration-form/index.jsx b/frontend/courses-front/src/components/registration-form/index.jsx index da38849..5492a27 100644 --- a/frontend/courses-front/src/components/registration-form/index.jsx +++ b/frontend/courses-front/src/components/registration-form/index.jsx @@ -47,11 +47,12 @@ const handleSubmit = async () => { if (gradeResponse.status === "updated") { setCheckResult({ - type: "success", + type: gradeResponse.warning ? "warning" : "success", message: gradeResponse.message, result: gradeResponse.result, passed: gradeResponse.passed, checks: gradeResponse.checks, + warning: gradeResponse.warning, }); } else if (gradeResponse.status === "rejected") { setCheckResult({ @@ -208,6 +209,36 @@ const handleSubmit = async () => { ))} )} + + {checkResult.warning && ( +
+
+ {checkResult.warning.message} +
+ {checkResult.warning.files && checkResult.warning.files.length > 0 && ( +
+ {checkResult.warning.files.map((file, index) => ( +
• {file}
+ ))} +
+ )} +
+ )} )} diff --git a/grading/__init__.py b/grading/__init__.py index 2c5b9b6..268827e 100644 --- a/grading/__init__.py +++ b/grading/__init__.py @@ -38,6 +38,7 @@ GitHubClient, CommitInfo, check_forbidden_modifications, + check_forbidden_files_in_list, get_default_forbidden_patterns, ) @@ -49,6 +50,8 @@ prepare_grade_update, get_deadline_from_sheet, get_student_order, + set_cell_warning, + format_forbidden_files_note, StudentLocation, LabColumn, GradeUpdate, @@ -58,6 +61,7 @@ LabGrader, GradeResult, GradeStatus, + ForbiddenFilesWarning, ) __all__ = [ @@ -83,6 +87,7 @@ "GitHubClient", "CommitInfo", "check_forbidden_modifications", + "check_forbidden_files_in_list", "get_default_forbidden_patterns", # sheets_client "find_student_row", @@ -92,6 +97,8 @@ "prepare_grade_update", "get_deadline_from_sheet", "get_student_order", + "set_cell_warning", + "format_forbidden_files_note", "StudentLocation", "LabColumn", "GradeUpdate", @@ -99,4 +106,5 @@ "LabGrader", "GradeResult", "GradeStatus", + "ForbiddenFilesWarning", ] diff --git a/grading/github_client.py b/grading/github_client.py index 090cd1e..32a7932 100644 --- a/grading/github_client.py +++ b/grading/github_client.py @@ -14,6 +14,7 @@ class CommitInfo: """Information about a commit.""" sha: str files: list[dict[str, Any]] # List of {filename, status, ...} + all_modified_files: list[str] | None = None # All files modified across all commits @dataclass @@ -196,6 +197,57 @@ def get_job_logs(self, org: str, repo: str, job_id: int) -> str | None: return resp.text + def get_all_modified_files(self, org: str, repo: str) -> list[str]: + """ + Get all files that were modified across all commits in the repository. + + This iterates through all commits and collects unique file paths + that were added, modified, or removed. + + Args: + org: Organization or user name + repo: Repository name + + Returns: + List of unique file paths that were modified in any commit + """ + modified_files: set[str] = set() + + # Get all commits (paginated) + page = 1 + per_page = 100 + while True: + url = f"{self.BASE_URL}/repos/{org}/{repo}/commits" + params = {"page": page, "per_page": per_page} + resp = requests.get(url, headers=self.headers, params=params) + + if resp.status_code != 200: + break + + commits = resp.json() + if not commits: + break + + # Get details for each commit to see modified files + for commit_summary in commits: + commit_sha = commit_summary["sha"] + commit_url = f"{self.BASE_URL}/repos/{org}/{repo}/commits/{commit_sha}" + commit_resp = requests.get(commit_url, headers=self.headers) + + if commit_resp.status_code == 200: + commit_data = commit_resp.json() + for file_info in commit_data.get("files", []): + filename = file_info.get("filename", "") + if filename: + modified_files.add(filename) + + # Check if there are more pages + if len(commits) < per_page: + break + page += 1 + + return list(modified_files) + def check_forbidden_modifications( commit_files: list[dict[str, Any]], @@ -235,6 +287,42 @@ def check_forbidden_modifications( return violations +def check_forbidden_files_in_list( + all_files: list[str], + forbidden_patterns: list[str] +) -> list[str]: + """ + Check if any forbidden files are in the list of modified files. + + Unlike check_forbidden_modifications, this works with a simple list of filenames + without status information. Use this when checking all files modified across + all commits in a repository. + + Args: + all_files: List of file paths that were modified + forbidden_patterns: List of forbidden file paths or prefixes + + Returns: + List of forbidden files that were found + + Examples: + >>> check_forbidden_files_in_list(["test_main.py", "main.cpp"], ["test_main.py"]) + ['test_main.py'] + >>> check_forbidden_files_in_list(["tests/test1.py"], ["tests/"]) + ['tests/test1.py'] + """ + violations = [] + + for filename in all_files: + for pattern in forbidden_patterns: + # Exact match or prefix match (for directories like "tests/") + if filename == pattern or filename.startswith(pattern): + violations.append(filename) + break + + return violations + + def get_default_forbidden_patterns(required_files: list[str]) -> list[str]: """ Get default forbidden file patterns based on required files. diff --git a/grading/grader.py b/grading/grader.py index 8df67b2..227b406 100644 --- a/grading/grader.py +++ b/grading/grader.py @@ -13,6 +13,7 @@ from .github_client import ( GitHubClient, check_forbidden_modifications, + check_forbidden_files_in_list, get_default_forbidden_patterns, ) from .ci_checker import ( @@ -59,6 +60,13 @@ class CIEvaluation: latest_success_time: datetime | None = None # For penalty calculation +@dataclass +class ForbiddenFilesWarning: + """Warning about forbidden file modifications (not blocking).""" + violations: list[str] # List of forbidden files that were modified + message: str # User-facing warning message + + class LabGrader: """ Orchestrates lab grading operations. @@ -142,9 +150,12 @@ def check_forbidden_files( org: str, repo_name: str, lab_config: dict[str, Any] - ) -> GradeResult | None: + ) -> ForbiddenFilesWarning | None: """ - Check for forbidden file modifications. + Check for forbidden file modifications across all commits. + + This method checks ALL commits in the repository, not just the latest one, + to detect any forbidden file modifications. Args: org: GitHub organization @@ -152,49 +163,35 @@ def check_forbidden_files( lab_config: Lab configuration dict from YAML Returns: - GradeResult with error if violation found, None otherwise + ForbiddenFilesWarning with violations if found, None otherwise """ - commit = self.github.get_latest_commit(org, repo_name) - if commit is None: - return None + # Get forbidden patterns from config (new field: forbidden-files) + forbidden = lab_config.get("forbidden-files", []) - # Get forbidden patterns from config or defaults - required_files = lab_config.get("files", []) - forbidden = lab_config.get("forbidden-modifications", []).copy() if not forbidden: - forbidden = get_default_forbidden_patterns(required_files) + return None - if not forbidden: + # Get ALL modified files across all commits + all_modified = self.github.get_all_modified_files(org, repo_name) + + if not all_modified: return None - violations = check_forbidden_modifications(commit.files, forbidden) + violations = check_forbidden_files_in_list(all_modified, forbidden) if violations: - # Return error for first violation - if "test_main.py" in violations: - return GradeResult( - status=GradeStatus.ERROR, - result=None, - message="🚨 Нельзя изменять test_main.py", - passed=None, - error_code="FORBIDDEN_MODIFICATION", - ) - for v in violations: - if v.startswith("tests/"): - return GradeResult( - status=GradeStatus.ERROR, - result=None, - message="🚨 Нельзя изменять папку tests/", - passed=None, - error_code="FORBIDDEN_MODIFICATION", - ) - # Generic message for other forbidden files - return GradeResult( - status=GradeStatus.ERROR, - result=None, - message=f"🚨 Нельзя изменять файл {violations[0]}", - passed=None, - error_code="FORBIDDEN_MODIFICATION", + # Build warning message + if len(violations) == 1: + message = f"⚠️ Обнаружено изменение запрещённого файла: {violations[0]}" + else: + files_str = ", ".join(violations[:3]) + if len(violations) > 3: + files_str += f" и ещё {len(violations) - 3}" + message = f"⚠️ Обнаружены изменения запрещённых файлов: {files_str}" + + return ForbiddenFilesWarning( + violations=violations, + message=message, ) return None diff --git a/grading/sheets_client.py b/grading/sheets_client.py index e27c5ce..d45157e 100644 --- a/grading/sheets_client.py +++ b/grading/sheets_client.py @@ -11,6 +11,13 @@ logger = logging.getLogger(__name__) +# Yellow background color for warnings (RGB values 0-1) +WARNING_BACKGROUND_COLOR = { + "red": 1.0, + "green": 0.95, + "blue": 0.6, +} + # Cell protection rules: these values can be overwritten OVERWRITABLE_VALUES = {"", "x", "?"} OVERWRITABLE_PREFIXES = ("?",) # Cells starting with "?" can be overwritten @@ -307,3 +314,77 @@ def get_student_order( except Exception as e: logger.error(f"Error reading task ID: {e}") return None + + +def set_cell_warning( + worksheet, + row: int, + col: int, + note: str, +) -> bool: + """ + Set warning formatting on a cell: yellow background and note/comment. + + Uses Google Sheets API batch_update for formatting and note update. + + Args: + worksheet: gspread Worksheet object + row: 1-based row number + col: 1-based column number + note: Comment/note text to add to the cell + + Returns: + True if successful, False otherwise + """ + try: + spreadsheet = worksheet.spreadsheet + sheet_id = worksheet.id + + # Build batch update request for yellow background + requests = [ + { + "repeatCell": { + "range": { + "sheetId": sheet_id, + "startRowIndex": row - 1, + "endRowIndex": row, + "startColumnIndex": col - 1, + "endColumnIndex": col, + }, + "cell": { + "userEnteredFormat": { + "backgroundColor": WARNING_BACKGROUND_COLOR, + }, + "note": note, + }, + "fields": "userEnteredFormat.backgroundColor,note", + } + } + ] + + spreadsheet.batch_update({"requests": requests}) + logger.info(f"Set warning format on cell ({row}, {col})") + return True + + except Exception as e: + logger.error(f"Failed to set cell warning: {e}") + return False + + +def format_forbidden_files_note(violations: list[str]) -> str: + """ + Format a note/comment for forbidden file modifications. + + Args: + violations: List of forbidden file paths that were modified + + Returns: + Formatted note text + + Examples: + >>> format_forbidden_files_note(["test_main.py"]) + '⚠️ Изменены запрещённые файлы:\\n- test_main.py' + """ + header = "⚠️ Изменены запрещённые файлы:" + files_list = "\n".join(f"- {f}" for f in violations) + return f"{header}\n{files_list}" diff --git a/main.py b/main.py index 460be15..ca809e7 100644 --- a/main.py +++ b/main.py @@ -19,6 +19,7 @@ LabGrader, GitHubClient, GradeStatus, + ForbiddenFilesWarning, find_student_row, find_lab_column_by_name, calculate_lab_column, @@ -26,6 +27,8 @@ get_deadline_from_sheet, get_student_order, calculate_expected_taskid, + set_cell_warning, + format_forbidden_files_note, ) # Configure logging to both file and console @@ -616,11 +619,10 @@ def grade_lab(course_id: str, group_id: str, lab_id: str, request: GradeRequest) status_code = 404 if repo_error.error_code == "NO_COMMITS" else 400 raise HTTPException(status_code=status_code, detail=repo_error.message) - # Step 2: Check forbidden file modifications - forbidden_error = grader.check_forbidden_files(org, repo_name, lab_config_dict) - if forbidden_error: - logger.warning(f"Forbidden modification: {forbidden_error.message}") - raise HTTPException(status_code=403, detail=forbidden_error.message) + # Step 2: Check forbidden file modifications (warning, not blocking) + forbidden_warning = grader.check_forbidden_files(org, repo_name, lab_config_dict) + if forbidden_warning: + logger.warning(f"Forbidden files modified: {forbidden_warning.violations}") # Step 3: Evaluate CI results ci_evaluation = grader._evaluate_ci_internal(org, repo_name, lab_config_dict) @@ -756,13 +758,29 @@ def grade_lab(course_id: str, group_id: str, lab_id: str, request: GradeRequest) sheet.update_cell(row_idx, lab_col, final_result) logger.info(f"Successfully updated grade for '{username}' in lab {lab_id}") - return { + # If there were forbidden file modifications, mark cell with warning + warning_data = None + if forbidden_warning: + note = format_forbidden_files_note(forbidden_warning.violations) + set_cell_warning(sheet, row_idx, lab_col, note) + logger.info(f"Added warning to cell for forbidden files: {forbidden_warning.violations}") + warning_data = { + "message": forbidden_warning.message, + "files": forbidden_warning.violations, + } + + response = { "status": "updated", "result": final_result, "message": final_message, "passed": ci_evaluation.grade_result.passed, - "checks": ci_evaluation.grade_result.checks + "checks": ci_evaluation.grade_result.checks, } + + if warning_data: + response["warning"] = warning_data + + return response except HTTPException: raise except Exception as e: diff --git a/tests/test_github_client.py b/tests/test_github_client.py index 3a184af..6d4ba9a 100644 --- a/tests/test_github_client.py +++ b/tests/test_github_client.py @@ -14,6 +14,7 @@ GitHubClient, CommitInfo, check_forbidden_modifications, + check_forbidden_files_in_list, get_default_forbidden_patterns, ) @@ -336,5 +337,172 @@ def test_empty_required_files(self): assert patterns == [] +class TestCheckForbiddenFilesInList: + """Tests for check_forbidden_files_in_list function.""" + + def test_no_violations(self): + """No violations when files don't match patterns.""" + files = ["main.py", "utils.py", "lib/helper.py"] + violations = check_forbidden_files_in_list(files, ["test_main.py", "tests/"]) + assert violations == [] + + def test_exact_match(self): + """Detect exact filename match.""" + files = ["main.py", "test_main.py", "utils.py"] + violations = check_forbidden_files_in_list(files, ["test_main.py"]) + assert violations == ["test_main.py"] + + def test_prefix_match(self): + """Detect prefix match for directories.""" + files = ["main.py", "tests/test_example.py", "tests/helper.py"] + violations = check_forbidden_files_in_list(files, ["tests/"]) + assert len(violations) == 2 + assert "tests/test_example.py" in violations + assert "tests/helper.py" in violations + + def test_multiple_patterns(self): + """Multiple patterns detected.""" + files = ["test_main.py", "tests/test.py", "main.py"] + violations = check_forbidden_files_in_list(files, ["test_main.py", "tests/"]) + assert len(violations) == 2 + assert "test_main.py" in violations + assert "tests/test.py" in violations + + def test_empty_files_list(self): + """Empty files list returns no violations.""" + violations = check_forbidden_files_in_list([], ["test_main.py"]) + assert violations == [] + + def test_empty_patterns_list(self): + """Empty patterns list returns no violations.""" + violations = check_forbidden_files_in_list(["test_main.py"], []) + assert violations == [] + + +class TestGitHubClientGetAllModifiedFiles: + """Tests for get_all_modified_files method.""" + + @responses.activate + def test_single_commit(self): + """Get modified files from single commit.""" + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits", + json=[{"sha": "abc123"}], + status=200 + ) + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits/abc123", + json={ + "sha": "abc123", + "files": [ + {"filename": "main.py"}, + {"filename": "test_main.py"} + ] + }, + status=200 + ) + client = GitHubClient("test_token") + files = client.get_all_modified_files("org", "repo") + + assert len(files) == 2 + assert "main.py" in files + assert "test_main.py" in files + + @responses.activate + def test_multiple_commits(self): + """Get modified files from multiple commits.""" + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits", + json=[{"sha": "abc123"}, {"sha": "def456"}], + status=200 + ) + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits/abc123", + json={ + "sha": "abc123", + "files": [{"filename": "main.py"}] + }, + status=200 + ) + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits/def456", + json={ + "sha": "def456", + "files": [{"filename": "test_main.py"}] + }, + status=200 + ) + client = GitHubClient("test_token") + files = client.get_all_modified_files("org", "repo") + + assert len(files) == 2 + assert "main.py" in files + assert "test_main.py" in files + + @responses.activate + def test_duplicate_files_across_commits(self): + """Duplicate files across commits are deduplicated.""" + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits", + json=[{"sha": "abc123"}, {"sha": "def456"}], + status=200 + ) + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits/abc123", + json={ + "sha": "abc123", + "files": [{"filename": "main.py"}] + }, + status=200 + ) + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits/def456", + json={ + "sha": "def456", + "files": [{"filename": "main.py"}] # Same file + }, + status=200 + ) + client = GitHubClient("test_token") + files = client.get_all_modified_files("org", "repo") + + assert len(files) == 1 + assert "main.py" in files + + @responses.activate + def test_no_commits(self): + """No commits returns empty list.""" + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits", + json=[], + status=200 + ) + client = GitHubClient("test_token") + files = client.get_all_modified_files("org", "repo") + assert files == [] + + @responses.activate + def test_api_error(self): + """API error returns empty list.""" + responses.add( + responses.GET, + "https://api.github.com/repos/org/repo/commits", + json={"message": "Not Found"}, + status=404 + ) + client = GitHubClient("test_token") + files = client.get_all_modified_files("org", "repo") + assert files == [] + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/test_grader.py b/tests/test_grader.py index ec36b57..3786f66 100644 --- a/tests/test_grader.py +++ b/tests/test_grader.py @@ -83,43 +83,56 @@ def grader(self, mock_github): def test_no_violations(self, grader, mock_github): """Test success when no forbidden files are modified.""" - config = {"github-prefix": "lab1", "files": ["test_main.py"]} - mock_github.get_latest_commit.return_value = CommitInfo( - sha="abc123", - files=[{"filename": "main.cpp", "status": "modified"}] - ) + config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py"]} + mock_github.get_all_modified_files.return_value = ["main.cpp", "lib.cpp"] result = grader.check_forbidden_files("org", "lab1-user", config) assert result is None + def test_no_forbidden_files_config(self, grader, mock_github): + """Test returns None when forbidden-files not configured.""" + config = {"github-prefix": "lab1"} # No forbidden-files key + + result = grader.check_forbidden_files("org", "lab1-user", config) + + assert result is None + mock_github.get_all_modified_files.assert_not_called() + def test_test_main_modified(self, grader, mock_github): - """Test error when test_main.py is modified.""" - config = {"github-prefix": "lab1", "files": ["test_main.py"]} - mock_github.get_latest_commit.return_value = CommitInfo( - sha="abc123", - files=[{"filename": "test_main.py", "status": "modified"}] - ) + """Test warning when test_main.py is modified.""" + config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py"]} + mock_github.get_all_modified_files.return_value = ["main.cpp", "test_main.py"] result = grader.check_forbidden_files("org", "lab1-user", config) assert result is not None - assert result.status == GradeStatus.ERROR assert "test_main.py" in result.message + assert result.violations == ["test_main.py"] def test_tests_folder_modified(self, grader, mock_github): - """Test error when tests/ folder is modified.""" - config = {"github-prefix": "lab1", "files": ["test_main.py"]} - mock_github.get_latest_commit.return_value = CommitInfo( - sha="abc123", - files=[{"filename": "tests/test_unit.py", "status": "removed"}] - ) + """Test warning when tests/ folder is modified.""" + config = {"github-prefix": "lab1", "forbidden-files": ["tests/"]} + mock_github.get_all_modified_files.return_value = ["main.cpp", "tests/test_unit.py"] result = grader.check_forbidden_files("org", "lab1-user", config) assert result is not None - assert result.status == GradeStatus.ERROR - assert "tests/" in result.message + assert "tests/test_unit.py" in result.violations + + def test_multiple_violations(self, grader, mock_github): + """Test warning with multiple forbidden files.""" + config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py", "tests/"]} + mock_github.get_all_modified_files.return_value = [ + "main.cpp", "test_main.py", "tests/helper.py" + ] + + result = grader.check_forbidden_files("org", "lab1-user", config) + + assert result is not None + assert len(result.violations) == 2 + assert "test_main.py" in result.violations + assert "tests/helper.py" in result.violations class TestLabGraderEvaluateCI: diff --git a/tests/test_sheets_client.py b/tests/test_sheets_client.py index c151d49..135f4bb 100644 --- a/tests/test_sheets_client.py +++ b/tests/test_sheets_client.py @@ -17,6 +17,7 @@ can_overwrite_cell, format_cell_protection_message, prepare_grade_update, + format_forbidden_files_note, StudentLocation, LabColumn, GradeUpdate, @@ -215,5 +216,33 @@ def test_update_v_penalty_blocked(self): assert result.success is False +class TestFormatForbiddenFilesNote: + """Tests for format_forbidden_files_note function.""" + + def test_single_file(self): + """Format note with single file.""" + note = format_forbidden_files_note(["test_main.py"]) + assert "test_main.py" in note + assert "⚠️" in note + assert "запрещённые" in note.lower() + + def test_multiple_files(self): + """Format note with multiple files.""" + note = format_forbidden_files_note(["test_main.py", "tests/test.py"]) + assert "test_main.py" in note + assert "tests/test.py" in note + + def test_empty_list(self): + """Format note with empty list.""" + note = format_forbidden_files_note([]) + assert "⚠️" in note + + def test_contains_bullet_points(self): + """Note contains bullet points for each file.""" + note = format_forbidden_files_note(["file1.py", "file2.py"]) + assert "- file1.py" in note + assert "- file2.py" in note + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From f307b84fc031fe175937f05294081dfd4d147188 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Dec 2025 12:39:39 +0000 Subject: [PATCH 2/2] Implement organization membership-based forbidden file detection This implementation distinguishes between instructor and student commits using GitHub Organization Membership API to prevent false positives when checking forbidden file modifications. Key changes: - Added `is_org_admin()` to check if commit author is org admin - Added `get_commits_with_authors()` to fetch commits with author info - Updated `check_forbidden_files()` to filter by commit author - Instructor commits are excluded from forbidden file checks - Students cannot forge this check (GitHub API protected) This solves the issue where: - Initial template commits would trigger false positives - Instructor bug fixes would trigger false positives - Different fork points for different students - Students could bypass detection by modifying files in early commits Tests: - All 98 unit tests passing - Added tests for instructor commit filtering - Fixed conftest.py to avoid unnecessary main.py imports --- CLAUDE.md | 12 ++++- grading/github_client.py | 103 ++++++++++++++++++++++++++++++++++----- grading/grader.py | 38 ++++++++++++--- tests/conftest.py | 16 ++++-- tests/test_grader.py | 64 +++++++++++++++++++++--- 5 files changed, 201 insertions(+), 32 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index de3b5ca..2b0897d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -86,11 +86,21 @@ labs: ### Forbidden Files Detection -When `forbidden-files` is configured, the system checks ALL commits in the repository: +When `forbidden-files` is configured, the system checks commits from students only: +- Commits by organization admins (instructors) are automatically excluded +- Uses GitHub Organization Membership API to determine instructor status +- Only modifications by non-admin users are checked against forbidden files - If violations found: grade is still written, but cell is highlighted yellow with a comment - Warning is shown to student on frontend - This helps detect students who modify test files to cheat +**How it works:** +1. System fetches all commits from student repository +2. For each commit, checks if author is an org admin via GitHub API +3. Instructor commits are skipped (they can update tests/templates) +4. Student commits are checked for forbidden file modifications +5. Works with GitHub Classroom - students are not org admins by default + ## CI/CD - **Tests**: Run on every push via `.github/workflows/tests.yml` diff --git a/grading/github_client.py b/grading/github_client.py index 32a7932..d8e6d88 100644 --- a/grading/github_client.py +++ b/grading/github_client.py @@ -197,21 +197,69 @@ def get_job_logs(self, org: str, repo: str, job_id: int) -> str | None: return resp.text - def get_all_modified_files(self, org: str, repo: str) -> list[str]: + def is_org_admin(self, org: str, username: str) -> bool: """ - Get all files that were modified across all commits in the repository. + Check if a user is an admin of the organization. - This iterates through all commits and collects unique file paths - that were added, modified, or removed. + This is used to distinguish instructor commits from student commits. + Only commits from org admins are considered "instructor commits" and + are excluded from forbidden file checks. + + Args: + org: Organization name + username: GitHub username to check + + Returns: + True if user is an admin, False otherwise + + Note: + Returns False if user is not a member, or if API call fails. + Students should NOT be org admins in GitHub Classroom setup. + """ + if not username: + return False + + # Check membership and role + url = f"{self.BASE_URL}/orgs/{org}/memberships/{username}" + resp = requests.get(url, headers=self.headers) + + if resp.status_code == 200: + data = resp.json() + role = data.get("role") + # Only admins are considered instructors + return role == "admin" + + # Not a member or API error - assume student + return False + + def get_commits_with_authors(self, org: str, repo: str) -> list[dict[str, Any]]: + """ + Get all commits with author information and modified files. + + This iterates through all commits and returns detailed information + including author GitHub login and modified files. Args: org: Organization or user name repo: Repository name Returns: - List of unique file paths that were modified in any commit + List of dicts with keys: + - sha: commit SHA + - author_login: GitHub username (or None if no GitHub author) + - files: list of modified file paths + + Example: + [ + { + "sha": "abc123", + "author_login": "student123", + "files": ["main.cpp", "test.cpp"] + }, + ... + ] """ - modified_files: set[str] = set() + commits_info = [] # Get all commits (paginated) page = 1 @@ -228,24 +276,57 @@ def get_all_modified_files(self, org: str, repo: str) -> list[str]: if not commits: break - # Get details for each commit to see modified files + # Get details for each commit for commit_summary in commits: commit_sha = commit_summary["sha"] + + # Extract author login (GitHub user) + author_login = None + if commit_summary.get("author"): + author_login = commit_summary["author"].get("login") + + # Get commit details with files commit_url = f"{self.BASE_URL}/repos/{org}/{repo}/commits/{commit_sha}" commit_resp = requests.get(commit_url, headers=self.headers) if commit_resp.status_code == 200: commit_data = commit_resp.json() - for file_info in commit_data.get("files", []): - filename = file_info.get("filename", "") - if filename: - modified_files.add(filename) + files = [f.get("filename") for f in commit_data.get("files", []) if f.get("filename")] + + commits_info.append({ + "sha": commit_sha, + "author_login": author_login, + "files": files, + }) # Check if there are more pages if len(commits) < per_page: break page += 1 + return commits_info + + def get_all_modified_files(self, org: str, repo: str) -> list[str]: + """ + Get all files that were modified across all commits in the repository. + + This iterates through all commits and collects unique file paths + that were added, modified, or removed. + + Args: + org: Organization or user name + repo: Repository name + + Returns: + List of unique file paths that were modified in any commit + """ + commits = self.get_commits_with_authors(org, repo) + modified_files: set[str] = set() + + for commit in commits: + for filename in commit["files"]: + modified_files.add(filename) + return list(modified_files) diff --git a/grading/grader.py b/grading/grader.py index 5b0ceff..51898e4 100644 --- a/grading/grader.py +++ b/grading/grader.py @@ -152,10 +152,11 @@ def check_forbidden_files( lab_config: dict[str, Any] ) -> ForbiddenFilesWarning | None: """ - Check for forbidden file modifications across all commits. + Check for forbidden file modifications by student commits. - This method checks ALL commits in the repository, not just the latest one, - to detect any forbidden file modifications. + This method checks ALL commits in the repository and filters out + commits made by organization admins (instructors). Only modifications + from student commits are considered violations. Args: org: GitHub organization @@ -165,19 +166,40 @@ def check_forbidden_files( Returns: ForbiddenFilesWarning with violations if found, None otherwise """ - # Get forbidden patterns from config (new field: forbidden-files) + # Get forbidden patterns from config forbidden = lab_config.get("forbidden-files", []) if not forbidden: return None - # Get ALL modified files across all commits - all_modified = self.github.get_all_modified_files(org, repo_name) + # Get all commits with author information + commits = self.github.get_commits_with_authors(org, repo_name) - if not all_modified: + if not commits: return None - violations = check_forbidden_files_in_list(all_modified, forbidden) + # Collect files modified by students (non-admin commits) + student_modified_files: set[str] = set() + + for commit in commits: + author_login = commit["author_login"] + + # Check if author is an org admin (instructor) + if author_login and self.github.is_org_admin(org, author_login): + # This is an instructor commit - skip it + logger.info(f"Skipping commit {commit['sha'][:7]} from instructor {author_login}") + continue + + # This is a student commit (or commit without GitHub author) + # Add all modified files from this commit + for filename in commit["files"]: + student_modified_files.add(filename) + + if not student_modified_files: + return None + + # Check if any student-modified files are forbidden + violations = check_forbidden_files_in_list(list(student_modified_files), forbidden) if violations: # Build warning message diff --git a/tests/conftest.py b/tests/conftest.py index 575f73d..3fa3814 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ def mock_env_vars(monkeypatch): @pytest.fixture(autouse=True) def disable_rate_limiting(request): """Disable rate limiting in tests by patching the limiter. - + By default, rate limiting is disabled for all tests to avoid interference. To test rate limiting functionality, mark your test with @pytest.mark.rate_limit. """ @@ -31,11 +31,19 @@ def disable_rate_limiting(request): # Don't disable rate limiting for this test yield return - + + # Only patch if main module is actually imported + # This avoids importing main (and its dependencies like gspread) + # for tests that don't need it + if 'main' not in sys.modules: + # main not imported yet - no need to patch + yield + return + # Patch limiter after main module is imported # We need to patch _check_request_limit to do nothing # and also ensure view_rate_limit is set to avoid AttributeError - + def noop_check(*args, **kwargs): """No-op function to disable rate limiting in tests.""" # Extract request from args (it's the second argument: self, request, func, sync) @@ -44,7 +52,7 @@ def noop_check(*args, **kwargs): # Set view_rate_limit to avoid AttributeError in wrapper if hasattr(request, 'state') and not hasattr(request.state, 'view_rate_limit'): request.state.view_rate_limit = None - + # Patch using the full path to the method patcher = patch('main.limiter._check_request_limit', noop_check, create=False) patcher.start() diff --git a/tests/test_grader.py b/tests/test_grader.py index 28307b3..a85eabe 100644 --- a/tests/test_grader.py +++ b/tests/test_grader.py @@ -84,7 +84,11 @@ def grader(self, mock_github): def test_no_violations(self, grader, mock_github): """Test success when no forbidden files are modified.""" config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py"]} - mock_github.get_all_modified_files.return_value = ["main.cpp", "lib.cpp"] + # Mock commits with author info - student modified only allowed files + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "student123", "files": ["main.cpp", "lib.cpp"]} + ] + mock_github.is_org_admin.return_value = False # student is not admin result = grader.check_forbidden_files("org", "lab1-user", config) @@ -97,12 +101,16 @@ def test_no_forbidden_files_config(self, grader, mock_github): result = grader.check_forbidden_files("org", "lab1-user", config) assert result is None - mock_github.get_all_modified_files.assert_not_called() + mock_github.get_commits_with_authors.assert_not_called() def test_test_main_modified(self, grader, mock_github): - """Test warning when test_main.py is modified.""" + """Test warning when test_main.py is modified by student.""" config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py"]} - mock_github.get_all_modified_files.return_value = ["main.cpp", "test_main.py"] + # Student modified forbidden file + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "student123", "files": ["main.cpp", "test_main.py"]} + ] + mock_github.is_org_admin.return_value = False # student is not admin result = grader.check_forbidden_files("org", "lab1-user", config) @@ -111,9 +119,12 @@ def test_test_main_modified(self, grader, mock_github): assert result.violations == ["test_main.py"] def test_tests_folder_modified(self, grader, mock_github): - """Test warning when tests/ folder is modified.""" + """Test warning when tests/ folder is modified by student.""" config = {"github-prefix": "lab1", "forbidden-files": ["tests/"]} - mock_github.get_all_modified_files.return_value = ["main.cpp", "tests/test_unit.py"] + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "student123", "files": ["main.cpp", "tests/test_unit.py"]} + ] + mock_github.is_org_admin.return_value = False result = grader.check_forbidden_files("org", "lab1-user", config) @@ -123,9 +134,10 @@ def test_tests_folder_modified(self, grader, mock_github): def test_multiple_violations(self, grader, mock_github): """Test warning with multiple forbidden files.""" config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py", "tests/"]} - mock_github.get_all_modified_files.return_value = [ - "main.cpp", "test_main.py", "tests/helper.py" + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "student123", "files": ["main.cpp", "test_main.py", "tests/helper.py"]} ] + mock_github.is_org_admin.return_value = False result = grader.check_forbidden_files("org", "lab1-user", config) @@ -134,6 +146,42 @@ def test_multiple_violations(self, grader, mock_github): assert "test_main.py" in result.violations assert "tests/helper.py" in result.violations + def test_instructor_commits_ignored(self, grader, mock_github): + """Test that instructor commits are excluded from checks.""" + config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py"]} + # Two commits: one from instructor, one from student + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "professor", "files": ["test_main.py"]}, + {"sha": "def456", "author_login": "student123", "files": ["main.cpp"]} + ] + # Mock org admin check - professor is admin, student is not + def is_admin_side_effect(org, username): + return username == "professor" + mock_github.is_org_admin.side_effect = is_admin_side_effect + + result = grader.check_forbidden_files("org", "lab1-user", config) + + # No violations because only instructor modified test_main.py + assert result is None + + def test_mixed_commits_only_student_violations(self, grader, mock_github): + """Test that only student violations are reported when instructor also modified files.""" + config = {"github-prefix": "lab1", "forbidden-files": ["test_main.py", "tests/"]} + mock_github.get_commits_with_authors.return_value = [ + {"sha": "abc123", "author_login": "professor", "files": ["test_main.py", "tests/test1.py"]}, + {"sha": "def456", "author_login": "student123", "files": ["main.cpp", "tests/test2.py"]} + ] + def is_admin_side_effect(org, username): + return username == "professor" + mock_github.is_org_admin.side_effect = is_admin_side_effect + + result = grader.check_forbidden_files("org", "lab1-user", config) + + # Only student's modification of tests/ should be reported + assert result is not None + assert result.violations == ["tests/test2.py"] + assert "test_main.py" not in result.violations # This was modified by instructor + class TestLabGraderEvaluateCI: """Tests for LabGrader.evaluate_ci."""