diff --git a/-Headers b/-Headers new file mode 100644 index 0000000..e69de29 diff --git a/-Uri b/-Uri new file mode 100644 index 0000000..e69de29 diff --git a/.dockerignore b/.dockerignore index 9550202..b9e6308 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,5 @@ __pycache__/ node_modules/ .env -.git \ No newline at end of file +.git +venv/ \ No newline at end of file diff --git a/Accept b/Accept new file mode 100644 index 0000000..e69de29 diff --git a/Authorization b/Authorization new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md new file mode 100644 index 0000000..2c88d95 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ + +```markdown +# Система обнаружения плагиата с помошью compare50/ Plagiarism Detection System + +## Быстрый старт / Quick Start + +### 1. Установка / Installation +```bash +pip install -r requirements.txt +pip install compare50 +``` + +### 2. Настройка / Configuration +```bash +echo "GITHUB_TOKEN=ваш_токен" > .env +cp credentials.example.json credentials.json +``` + +Конфигурация `courses/{course_id}.yaml`: +```yaml +labs: + "1": + plagiarism: + enabled: true # Включить проверку + threshold: 7.5 # Порог сходства (0-100) + reference_files: [data/distribution/lab1.cpp] # Эталонные файлы +``` + +### 3. Запуск проверки / Running Checks +```bash +# Запуск API сервера +uvicorn main:app --reload + +# Или прямое выполнение +python -m services.plagiarism.checker --course ваш_идентификатор_курса +``` + +## Основные возможности / Key Features +- **Автоматическое обнаружение плагиата** с использованием Compare50 + *Automated code similarity detection using Compare50* +- **Интеграция с GitHub CI** (проверяет только успешные сборки) + *GitHub CI integration (only checks passing builds)* +- **Экспорт результатов** в Google Таблицы + *Results exported to Google Sheets* +- **Генерация HTML отчетов** + *HTML reports generation* +- **REST API + кнопка в интерфейсе** + *REST API + frontend button* + +## Базовое использование / Basic Usage +1. Настройте YAML-файл курса + *Configure your course YAML file* +2. Запустите проверку через: + *Run the check via:* + - API: `POST /api/plagiarism/run/{course_id}` + - CLI: `python -m services.plagiarism.checker --course ваш_идентификатор_курса` + - Интерфейс: Кнопка "Запустить проверку на плагиат" + *Frontend: Click "Run Plagiarism Check" button* +3. Просмотр результатов: + *View results:* + - HTML: `reports/comparisons/{курс}/{лаба}/index.html` + *HTML: reports/comparisons/{course}/{lab}/index.html* + - Google Таблицы: Настроенная колонка статуса + *Google Sheets: Configured status column* + +## Требования / Requirements +- Python 3.10+ +- Compare50 +- Токен GitHub (права repo/workflow) + *GitHub token (repo/workflow permissions)* +- Аккаунт Google Service Account + *Google Service Account* diff --git a/backend.Dockerfile b/backend.Dockerfile index 0bdc087..b6a820a 100644 --- a/backend.Dockerfile +++ b/backend.Dockerfile @@ -2,6 +2,14 @@ FROM python:3.12-slim WORKDIR /app +# Install system dependencies for compare50 (Git + Rust) +RUN apt-get update && apt-get install -y \ + git \ + curl \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \ + && export PATH="$HOME/.cargo/bin:$PATH" \ + && rm -rf /var/lib/apt/lists/* + COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip \ @@ -11,4 +19,4 @@ COPY . . EXPOSE 8000 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/courses/os-2023.yaml b/courses/os-2023.yaml index e644785..c45566c 100644 --- a/courses/os-2023.yaml +++ b/courses/os-2023.yaml @@ -1,124 +1,129 @@ course: - name: ld - logo: "/assets/machine-learning.png" - alt-names: - - ML - - МД - - Мо - semester: Spring 2025 - email: k43guap@ya.ru - timezone: UTC+3 - github: - organization: suai-diplom-2025 - teachers: - - "Mark Polyak" - - markpolyak - google: - spreadsheet: 10iVAKvJVUyrjf7kEqm1TGOYk0lufyDJDbBItI5tnqAc - info-sheet: График - task-id-column: 0 - student-name-column: 2 - lab-column-offset: 1 - staff: - - name: Поляк Марк Дмитриевич - title: ст. преп. - status: лектор - - name: Поляк Марк Дмитриевич - title: ст. преп. - status: лабораторные работы - labs: - "1": - github-prefix: ml-task1 - short-name: ЛР1 - taskid-max: 25 - penalty-max: 6 - ci: - - workflows - files: - - lab1.sh - moss: - language: c - max-matches: 1000 - local-path: lab1 - additional: - - suai-os-2020 - - suai-os-2021 - - suai-os-2022 - - suai-os-2023 - basefiles: - - - repo: k43guap/os-course-task1 - filename: lab1.sh - report: - - Цель работы - - Индивидуальное задание - - Описание входных данных - - Результат выполнения работы - - Исходный код программы с комментариями - - Выводы - "2": - github-prefix: ml-task2 - short-name: ЛР2 - taskid-max: 20 - taskid-shift: 4 - penalty-max: 9 - ci: - - workflows - files: - - lab2.cpp - moss: - language: cc - max-matches: 1000 - local-path: lab2 - additional: - - suai-os-2020 - - suai-os-2021 - - suai-os-2022 - - suai-os-2023 - basefiles: - - - repo: k43guap/os-course-task2 - filename: lab2.cpp - - - repo: k43guap/os-course-task2 - filename: examples/ex3.cpp - report: - - Цель работы - - Задание на лабораторную работу - - Граф запуска потоков - - Результат выполнения работы - - Исходный код программы с комментариями - - Выводы - "3": - github-prefix: ml-task3 - short-name: ЛР3 - taskid-max: 20 - penalty-max: 7 - ci: - - workflows - files: - - lab3.cpp - moss: - language: cc - max-matches: 1000 - local-path: lab3 - additional: - - suai-os-2020 - - suai-os-2021 - - suai-os-2022 - - suai-os-2023 - basefiles: - - - repo: k43guap/os-course-task3 - filename: lab3.cpp - report: - - Цель работы - - Задание на лабораторную работу - - Граф запуска потоков - - Результат выполнения работы - - Исходный код программы с комментариями - - Выводы -misc: + name: ld + logo: "/assets/machine-learning.png" + alt-names: + - ML + - МД + - Мо + semester: Spring 2024 + email: k43guap@ya.ru + timezone: UTC+3 + github: + organization: suai-os-2024f + prefix: ml-task # Global fallback prefix for all labs + teachers: + - "Mark Polyak" + - markpolyak + google: + spreadsheet: 1cnHY7P9Rqnf7vc0FBzRiyvLCpV2KA6uTOO2SQp7s8SE + info-sheet: График + github-column: "AH" + status-column: "AI" + task-id-column: 0 + student-name-column: 2 + lab-column-offset: 1 + start-row: 3 + misc: requests-timeout: 5 + staff: + - name: Поляк Марк Дмитриевич + title: ст. преп. + status: лектор + - name: Поляк Марк Дмитриевич + title: ст. преп. + status: лабораторные работы + labs: + "1": + github-prefix: ml-task1 + short-name: ЛР1 + taskid-max: 25 + penalty-max: 6 + ci: true + files: + - lab1.sh + plagiarism: + enabled: false # Plagiarism enabled for all labs + threshold: 7.5 # Example threshold + language: sh + max-matches: 1000 + additional: + - suai-os-2020 + - suai-os-2021 + - suai-os-2022 + - suai-os-2023 + basefiles: + - repo: k43guap/os-course-task1 + filename: lab1.sh + report: + - Цель работы + - Индивидуальное задание + - Описание входных данных + - Результат выполнения работы + - Исходный код программы с комментариями + - Выводы + "2": + github-prefix: os-task2 + short-name: ЛР2 + taskid-max: 20 + taskid-shift: 4 + penalty-max: 9 + ci: true + files: + - lab2.cpp + plagiarism: + enabled: true # Plagiarism enabled for all labs + threshold: 7.5 + language: cc + reference_files: + - data/distribution/2/lab2.cpp + max-matches: 1000 + additional: + - suai-os-2020 + - suai-os-2021 + - suai-os-2022 + - suai-os-2023 + basefiles: + - repo: k43guap/os-course-task2 + filename: lab2.cpp + - repo: k43guap/os-course-task2 + filename: examples/ex3.cpp + report: + - Цель работы + - Задание на лабораторную работу + - Граф запуска потоков + - Результат выполнения работы + - Исходный код программы с комментариями + - Выводы + "3": + github-prefix: os-task3 + short-name: ЛР3 + taskid-max: 20 + penalty-max: 7 + ci: true + files: + - lab3.cpp + plagiarism: + enabled: false # Plagiarism enabled for all labs + threshold: 7.5 + language: cc + reference_files: + - data/distribution/ld/3/lab3.cpp + max-matches: 1000 + additional: + - suai-os-2020 + - suai-os-2021 + - suai-os-2022 + - suai-os-2023 + basefiles: + - repo: k43guap/os-course-task3 + filename: lab3.cpp + report: + - Цель работы + - Задание на лабораторную работу + - Граф запуска потоков + - Результат выполнения работы + - Исходный код программы с комментариями + - Выводы - +misc: + requests-timeout: 5 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 11516ac..014cc6f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,3 +21,6 @@ services: - .:/app env_file: - .env + +volumes: + cargo-cache: \ No newline at end of file diff --git a/frontend/courses-front/package-lock.json b/frontend/courses-front/package-lock.json index 051900d..3747ea6 100644 --- a/frontend/courses-front/package-lock.json +++ b/frontend/courses-front/package-lock.json @@ -15,8 +15,10 @@ "@uiw/react-codemirror": "^4.23.12", "antd": "^5.24.4", "axios": "^1.9.0", + "i18next": "^23.0.1", "react": "^19.0.0", "react-dom": "^19.0.0", + "react-i18next": "^13.0.1", "react-router-dom": "^6.23.0", "rollup": "^4.34.7", "styled-components": "^6.1.16" @@ -2166,6 +2168,7 @@ "version": "19.1.4", "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.4.tgz", "integrity": "sha512-EB1yiiYdvySuIITtD5lhW4yPyJ31RkJkkDw794LaQYrxCSaQV/47y5o1FMC4zF9ZyjUjzJMZwbovEnT5yHTW6g==", + "dev": true, "license": "MIT", "dependencies": { "csstype": "^3.0.2" @@ -4266,6 +4269,14 @@ "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", "license": "MIT" }, + "node_modules/html-parse-stringify": { + "version": "3.0.1", + "resolved": "https://registry.npmmirror.com/html-parse-stringify/-/html-parse-stringify-3.0.1.tgz", + "integrity": "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg==", + "dependencies": { + "void-elements": "3.1.0" + } + }, "node_modules/http-errors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", @@ -4283,6 +4294,28 @@ "node": ">= 0.8" } }, + "node_modules/i18next": { + "version": "23.16.8", + "resolved": "https://registry.npmmirror.com/i18next/-/i18next-23.16.8.tgz", + "integrity": "sha512-06r/TitrM88Mg5FdUXAKL96dJMzgqLE5dv3ryBAra4KCwD9mJ4ndOTS95ZuymIGoE+2hzfdaMak2X11/es7ZWg==", + "funding": [ + { + "type": "individual", + "url": "https://locize.com" + }, + { + "type": "individual", + "url": "https://locize.com/i18next.html" + }, + { + "type": "individual", + "url": "https://www.i18next.com/how-to/faq#i18next-is-awesome.-how-can-i-support-the-project" + } + ], + "dependencies": { + "@babel/runtime": "^7.23.2" + } + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -6151,6 +6184,27 @@ "react": "^19.1.0" } }, + "node_modules/react-i18next": { + "version": "13.5.0", + "resolved": "https://registry.npmmirror.com/react-i18next/-/react-i18next-13.5.0.tgz", + "integrity": "sha512-CFJ5NDGJ2MUyBohEHxljOq/39NQ972rh1ajnadG9BjTk+UXbHLq4z5DKEbEQBDoIhUmmbuS/fIMJKo6VOax1HA==", + "dependencies": { + "@babel/runtime": "^7.22.5", + "html-parse-stringify": "^3.0.1" + }, + "peerDependencies": { + "i18next": ">= 23.2.3", + "react": ">= 16.8.0" + }, + "peerDependenciesMeta": { + "react-dom": { + "optional": true + }, + "react-native": { + "optional": true + } + } + }, "node_modules/react-is": { "version": "19.1.0", "resolved": "https://registry.npmjs.org/react-is/-/react-is-19.1.0.tgz", @@ -7233,6 +7287,14 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/void-elements": { + "version": "3.1.0", + "resolved": "https://registry.npmmirror.com/void-elements/-/void-elements-3.1.0.tgz", + "integrity": "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/w3c-keyname": { "version": "2.2.8", "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", @@ -7368,21 +7430,6 @@ "dev": true, "license": "ISC" }, - "node_modules/yaml": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz", - "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==", - "dev": true, - "license": "ISC", - "optional": true, - "peer": true, - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - } - }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/frontend/courses-front/src/App.css b/frontend/courses-front/src/App.css index f135615..467cd22 100644 --- a/frontend/courses-front/src/App.css +++ b/frontend/courses-front/src/App.css @@ -44,4 +44,27 @@ .read-the-docs { color: #888; -} \ No newline at end of file +} +/* Plagiarism specific styles +.plagiarism-link { + color: #3182ce; + text-decoration: none; + font-weight: 500; + padding: 0.5rem; + border-radius: 4px; + transition: #ebf8ff 0.2s; +} + +.plagiarism-link:hover { + background: #ebf8ff; + text-decoration: underline; +} + +.error-message { + color: #e53e3e; + background: #fff5f5; + padding: 1rem; + border-radius: 4px; + margin-bottom: 1rem; + border-left: 4px solid #e53e3e; +}*/ \ No newline at end of file diff --git a/frontend/courses-front/src/App.jsx b/frontend/courses-front/src/App.jsx index 54c8539..68ab848 100644 --- a/frontend/courses-front/src/App.jsx +++ b/frontend/courses-front/src/App.jsx @@ -35,4 +35,4 @@ function App() { ); } -export default App; +export default App; \ No newline at end of file diff --git a/frontend/courses-front/src/api/index.js b/frontend/courses-front/src/api/index.js index 3fbd4fc..dea9f9e 100644 --- a/frontend/courses-front/src/api/index.js +++ b/frontend/courses-front/src/api/index.js @@ -55,3 +55,28 @@ export async function gradeLab(courseId, groupId, labId, github) { return response.json(); } +/* +export const checkPlagiarism = async (repoUrl, files, sensitivity) => { + const response = await fetch(`${API_BASE_URL}/admin/plagiarism-check`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${localStorage.getItem('token')}` + }, + body: JSON.stringify({ repoUrl, files, sensitivity }), + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.message || 'Plagiarism check failed'); + } + + return response.json(); +}; + +/*export const getPlagiarismConfig = async (courseId, labId) => { + const response = await fetch( + `${API_BASE_URL}/courses/${courseId}/labs/${labId}/plagiarism-config` + ); + return response.json(); +};*/ \ No newline at end of file diff --git a/frontend/courses-front/src/components/admin/AdminLogin.jsx b/frontend/courses-front/src/components/admin/AdminLogin.jsx index 4d6e43e..e078f0a 100644 --- a/frontend/courses-front/src/components/admin/AdminLogin.jsx +++ b/frontend/courses-front/src/components/admin/AdminLogin.jsx @@ -65,4 +65,4 @@ export const AdminLogin = () => { ); -}; +}; \ No newline at end of file diff --git a/frontend/courses-front/src/components/admin/ProtectedRoute.jsx b/frontend/courses-front/src/components/admin/ProtectedRoute.jsx index fd9ff3f..5253263 100644 --- a/frontend/courses-front/src/components/admin/ProtectedRoute.jsx +++ b/frontend/courses-front/src/components/admin/ProtectedRoute.jsx @@ -23,4 +23,4 @@ export const ProtectedRoute = ({ children }) => { if (!isAuth) return ; return children; -}; +}; \ No newline at end of file diff --git a/frontend/courses-front/src/components/admin/styled.js b/frontend/courses-front/src/components/admin/styled.js index 8bdaf85..c095178 100644 --- a/frontend/courses-front/src/components/admin/styled.js +++ b/frontend/courses-front/src/components/admin/styled.js @@ -64,4 +64,4 @@ export const TextError = styled.p` @media (max-width: ${breakpoints.tablet}) { font-size: 12px; } -`; +`; \ No newline at end of file diff --git a/frontend/courses-front/src/components/course-list/index.jsx b/frontend/courses-front/src/components/course-list/index.jsx index 61f2d1e..8fdf710 100644 --- a/frontend/courses-front/src/components/course-list/index.jsx +++ b/frontend/courses-front/src/components/course-list/index.jsx @@ -189,7 +189,45 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => { } }; +const handleRunPlagiarismCheck = async (courseId) => { + try { + // 1. Run plagiarism check + const runResponse = await fetch(`http://127.0.0.1:8000/api/plagiarism/run/${courseId}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${localStorage.getItem('token')}` + } + }); + + const { status, checked_labs, course_name } = await runResponse.json(); + showSnackbar(status || "Plagiarism check completed", "success"); + + // 2. Use first enabled lab + const activeLabId = checked_labs?.[0]; + if (!activeLabId) { + showSnackbar("No labs with plagiarism checking enabled", "warning"); + return; + } + // 3. Construct URL (no encoding needed for simple paths) + const reportUrl = `http://127.0.0.1:8000/reports/comparisons/${course_name}/${activeLabId}/index.html`; + + // 4. Directly open in new tab with forced load + const newWindow = window.open(reportUrl, '_blank', 'noopener,noreferrer'); + + // Fallback if blocked by popup blocker + if (!newWindow || newWindow.closed || typeof newWindow.closed === 'undefined') { + showSnackbar("Please allow popups for this site", "warning"); + // Alternative: redirect current tab + window.location.href = reportUrl; + } + + } catch (error) { + showSnackbar("Failed to generate plagiarism report", "error"); + console.error("Plagiarism check error:", error); + } +}; const languages = [ { code: "ru", label: "Русский" }, { code: "en", label: "English" }, @@ -341,6 +379,7 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => { > {t("save")} + + )} @@ -434,4 +479,4 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => { ); -}; +}; \ No newline at end of file diff --git a/frontend/courses-front/src/components/lab-list/index.jsx b/frontend/courses-front/src/components/lab-list/index.jsx index 781297d..1c0680e 100644 --- a/frontend/courses-front/src/components/lab-list/index.jsx +++ b/frontend/courses-front/src/components/lab-list/index.jsx @@ -42,4 +42,4 @@ export const LabList = ({ courseId, groupId, onSelectLab, onBack }) => { )} ); -}; +}; \ No newline at end of file diff --git a/frontend/courses-front/src/locales/en/translation.json b/frontend/courses-front/src/locales/en/translation.json index 0b2e4b1..9aef879 100644 --- a/frontend/courses-front/src/locales/en/translation.json +++ b/frontend/courses-front/src/locales/en/translation.json @@ -19,5 +19,6 @@ "confirmDeleteText": "Are you sure you want to delete this course?", "yes": "Yes", "no": "No", - "expand": "Expand" + "expand": "Expand", + "Plagiarism": "Plagiarism" } diff --git a/frontend/courses-front/src/locales/ru/translation.json b/frontend/courses-front/src/locales/ru/translation.json index d30f0b8..cfd4622 100644 --- a/frontend/courses-front/src/locales/ru/translation.json +++ b/frontend/courses-front/src/locales/ru/translation.json @@ -19,5 +19,6 @@ "confirmDeleteText": "Вы уверены, что хотите удалить этот курс?", "yes": "Да", "no": "Нет", - "expand": "Развернуть" + "expand": "Развернуть", + "Plagiarism": "Антиплагиат" } diff --git a/frontend/courses-front/src/locales/zh/translation.json b/frontend/courses-front/src/locales/zh/translation.json index 3abb718..b38eeb2 100644 --- a/frontend/courses-front/src/locales/zh/translation.json +++ b/frontend/courses-front/src/locales/zh/translation.json @@ -1,5 +1,4 @@ { - "loadCourse": "加载课程", "courseUploaded": "课程上传成功", "select": "选择", "edit": "编辑", @@ -24,5 +23,6 @@ "confirmDelete": "确认删除", "confirmDeleteMessage": "您确定要删除此课程吗?", "expand": "展开", - "loadCourse": "加载课程" + "loadCourse": "加载课程", + "Plagiarism": "运行抄袭检查" } diff --git a/main.py b/main.py index 5d2c207..cf6a494 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,6 @@ -from fastapi import FastAPI, Request, Response, HTTPException +from fastapi import FastAPI, Request, Response, HTTPException # type: ignore import os +from fastapi.staticfiles import StaticFiles import yaml import gspread import requests @@ -7,11 +8,20 @@ from pydantic import BaseModel, Field from fastapi.responses import FileResponse from fastapi.middleware.cors import CORSMiddleware -from fastapi import UploadFile, File +from fastapi import UploadFile, File, Depends from dotenv import load_dotenv from itsdangerous import TimestampSigner, BadSignature import re +#my edition +from services.plagiarism import ( + ComparisonConfig, + GitHubFileDownloader, + PlagiarismChecker +) +from pathlib import Path +from typing import List + load_dotenv() app = FastAPI() COURSES_DIR = "courses" @@ -27,6 +37,8 @@ allow_methods=["*"], # Разрешить все HTTP-методы allow_headers=["*"], # Разрешить все заголовки ) +REPORTS_DIR = Path("/mnt/e/summer practicals/lab_grader_web/reports").resolve() +app.mount("/reports", StaticFiles(directory=REPORTS_DIR), name="reports") signer = TimestampSigner(SECRET_KEY) class AuthRequest(BaseModel): @@ -106,6 +118,7 @@ def get_courses(): "semester": course_info.get("semester", "Unknown"), "logo": course_info.get("logo", "/assets/default.png"), "email": course_info.get("email", ""), + "config_id": os.path.splitext(filename)[0], # <-- ✅ Added this line }) return courses @@ -487,4 +500,56 @@ async def upload_course(file: UploadFile = File(...)): with open(file_location, "wb") as f: f.write(content) - return {"detail": "Курс успешно загружен"} \ No newline at end of file + return {"detail": "Курс успешно загружен"} + +def find_course_config(course_id: str) -> Path: + for ext in [".yaml", ".yml"]: + path = Path(f"courses/{course_id}{ext}") + if path.exists(): + return path + raise HTTPException(status_code=404, detail=f"Course configuration '{course_id}.yaml/.yml' not found") + +@app.post("/api/plagiarism/run/{course_id}") +async def run_plagiarism_check(course_id: str, request: Request): + # ... existing authentication code ... + + config_path = find_course_config(course_id) + with open(config_path) as f: + config = yaml.safe_load(f) + + # Find labs with plagiarism enabled + enabled_labs = [] + for lab_id, lab_config in config["course"]["labs"].items(): + if lab_config.get("plagiarism", {}).get("enabled", False): + enabled_labs.append(lab_id) + + if not enabled_labs: + return {"status": "no labs with plagiarism checking enabled"} + + # Run plagiarism check for each enabled lab + checker = PlagiarismChecker() + for lab_id in enabled_labs: + checker.run_pipeline(config["course"], lab_id) + + return { + "status": "completed", + "checked_labs": enabled_labs, + "course_name": config["course"]["name"] # Add this line + } + +@app.get("/api/plagiarism/report-url/{course_name}/{lab_id}") +async def get_plagiarism_report_url(course_name: str, lab_id: str): + """Returns the full URL to access the plagiarism report""" + report_path = Path(f"reports/comparisons/{course_name}/{lab_id}/index.html") + + if not report_path.exists(): + raise HTTPException( + status_code=404, + detail=f"Report not found at {report_path}" + ) + + # Construct full URL (adjust for your deployment) + base_url = "http://127.0.0.1:8000" + return { + "url": f"{base_url}/reports/comparisons/{course_name}/{lab_id}/index.html" + } \ No newline at end of file diff --git a/services/plagiarism/__init__.py b/services/plagiarism/__init__.py new file mode 100644 index 0000000..c3a46ae --- /dev/null +++ b/services/plagiarism/__init__.py @@ -0,0 +1,17 @@ +# This __init__.py makes the directory a Python package and allows +# other modules to import from plagiarism using cleaner syntax. +from .checker import PlagiarismChecker +from .downloader import GitHubFileDownloader +from .models import PlagiarismResult, ComparisonConfig, CodeMatch +from .sheets_manager import SheetsManager +from .parser import extract_matches_from_html + +__all__ = [ + 'PlagiarismChecker', + 'GitHubFileDownloader', + 'SheetsManager', + 'PlagiarismResult', + 'ComparisonConfig', + 'CodeMatch', + 'extract_matches_from_html' # <-- Add this line +] diff --git a/services/plagiarism/checker.py b/services/plagiarism/checker.py new file mode 100644 index 0000000..7b639f0 --- /dev/null +++ b/services/plagiarism/checker.py @@ -0,0 +1,240 @@ +import os +import shutil +import subprocess +from pathlib import Path +from typing import List, Dict, Optional +import glob + +from dotenv import load_dotenv +from .models import PlagiarismResult, ComparisonConfig +from .downloader import GitHubFileDownloader +from .sheets_manager import SheetsManager +from .parser import extract_matches_from_html + +def _resolve_lab_key(course_config: Dict, lab_id: str) -> str: + for key, value in course_config["labs"].items(): + if key == lab_id or value.get("short-name") == lab_id: + return key + raise KeyError(f"Could not resolve lab_id: '{lab_id}'") + + + +class PlagiarismChecker: + def __init__(self): + self.downloader = None + self.sheets = None + + def _normalize_path(self, path: str) -> str: + """Normalize paths to consistent format for matching""" + path = str(path).replace('\\', '/').lower() # Convert to unix-style and lowercase + # Remove any trailing filenames to focus on directory paths + if '/' in path: + path = path.rsplit('/', 1)[0] + return path + + def run_pipeline(self, course_config: Dict, lab_id: str) -> List[PlagiarismResult]: + self.sheets = SheetsManager(course_config) + self.downloader = GitHubFileDownloader( + github_token=os.getenv("GITHUB_TOKEN_PLAGIARISM") or os.getenv("GITHUB_TOKEN"), + course_config=course_config + ) + + lab_key = _resolve_lab_key(course_config, lab_id) + #lab_config = course_config["labs"][lab_key] + lab_config = course_config["labs"].get(lab_id) + if not lab_config or not lab_config.get("plagiarism", {}).get("enabled", False): + print(f"Skipping lab {lab_id} - plagiarism checking not enabled") + return [] + + download_dir = Path(f"data/submissions/{course_config['name']}/{lab_key}") + download_dir.mkdir(parents=True, exist_ok=True) + + students = self._get_valid_submissions(lab_config, download_dir) + + config = ComparisonConfig( + lab_id=lab_id, + course_id=course_config["name"], + threshold=lab_config["plagiarism"]["threshold"], + reference_files=[Path(p) for p in lab_config["plagiarism"]["reference_files"]], + compare50_args=lab_config["plagiarism"].get("compare50_args", []), + language=lab_config["plagiarism"]["language"], + max_matches=lab_config["plagiarism"]["max-matches"], + local_path=lab_config.get("github-prefix", f"lab-{lab_id}"), + additional_orgs=lab_config["plagiarism"]["additional"], + basefiles=lab_config["plagiarism"]["basefiles"], + download_dir=download_dir, + output_dir=Path(f"reports/comparisons/{course_config['name']}/{lab_key}") + ) + threshold = config.threshold + print(f"Using threshold: {threshold}") # Debug line + + self.run_comparison(config) + self._mark_reports_in_sheet(students, config) + return [] + + def _get_valid_submissions(self, lab_config: Dict, download_dir: Path) -> List[Dict]: + valid = [] + for student in self.sheets.get_student_repos(): + if self.downloader.download_submission(lab_config, student["github"], download_dir): + valid.append(student) + return valid + + + def _mark_reports_in_sheet(self, students: List[Dict], config: ComparisonConfig): + for student in students: + self.sheets.update_status(student['row'], "Not done") + print(f"Set default status for {student['github']} (row {student['row']}): Not done") + + index_html = config.output_dir / "index.html" + if not index_html.exists(): + print(f"❌ Expected HTML report not found at {index_html}") + return + + with open(index_html, "r", encoding="utf-8") as f: + html = f.read() + + matches = extract_matches_from_html(html) + threshold = config.threshold + + print("\n=== DEBUGGING INFORMATION ===") + print(f"Threshold: {threshold}") + print("\nAll students in sheet:") + for student in students: + print(f"- {student['github']} (row {student['row']})") + + print("\nRaw matches from compare50:") + for i, (source, target, score) in enumerate(matches, 1): + print(f"{i}. {source} ↔ {target} ({score})") + + # Create student mapping + student_map = {student['github']: student for student in students} + flagged_students = set() + + def extract_username(path: str) -> Optional[str]: + """Flexible username extraction from various path formats""" + path = path.replace('\\', '/').lower() + parts = [p for p in path.split('/') if p] + + # Try multiple extraction strategies + for i, part in enumerate(parts): + # Match known student usernames in any path position + if part in student_map: + return part + + # Fallback: look for username-like patterns + if len(parts) >= 2: + # Try second-to-last component + candidate = parts[-2] + if any(candidate in s['github'].lower() for s in students): + return candidate + + return None + + # Process each match + for source, target, score in matches: + if score >= threshold: + print(f"\nProcessing high-score match: {source} ↔ {target} ({score})") + + source_user = extract_username(source) + target_user = extract_username(target) + + print(f"Extracted usernames: source={source_user}, target={target_user}") + + # Skip if either username is invalid + if not source_user or not target_user: + print("Skipping - could not extract both usernames") + continue + + # Skip distribution matches + if 'distribution' in source.lower() or 'distribution' in target.lower(): + print("Skipping - matches distribution code") + continue + + # Flag both students involved + for username in [source_user, target_user]: + if username in student_map: + flagged_students.add(username) + print(f"Flagging {username} (row {student_map[username]['row']})") + else: + print(f"Username {username} not found in student records") + + # Update Google Sheets + print("\nFinal updates to Google Sheets:") + for student in students: + status = "⚠️ Detected" if student['github'] in flagged_students else "✓ not detected" + print(f"Updating {student['github']} (row {student['row']}): {status}") + self.sheets.update_status(student['row'], status) + + def run_comparison(self, config: ComparisonConfig) -> None: + # Remove the output directory to avoid overwrite prompts + if config.output_dir.exists(): + shutil.rmtree(config.output_dir) + + config.output_dir.mkdir(parents=True, exist_ok=True) + + filename = f"lab{config.lab_id}.cpp" + reference_file = Path(f"data/distribution/{config.course_id}/{config.lab_id}/{filename}") + submission_glob = f"data/submissions/{config.course_id}/{config.lab_id}/*/{filename}" + submission_files = glob.glob(submission_glob) + + if not reference_file.exists(): + print(f"❌ Reference file not found: {reference_file}") + return + + if not submission_files: + print(f"❌ No submission files found using glob: {submission_glob}") + return + + cmd = [ + "compare50", + "--distro", str(reference_file), + "--output", str(config.output_dir), + *submission_files + ] + + print("Running Compare50 with auto-confirm...") + print(" ".join(cmd)) + + # Pipe `yes` into compare50 to auto-confirm any prompt + yes_proc = subprocess.Popen(['yes'], stdout=subprocess.PIPE) + result = subprocess.run(cmd, stdin=yes_proc.stdout, capture_output=True, text=True) + yes_proc.stdout.close() # Allow yes to receive a SIGPIPE if compare50 exits + yes_proc.wait() + + if result.returncode != 0: + print("❌ Compare50 failed.") + print("STDOUT:", result.stdout) + print("STDERR:", result.stderr) + raise RuntimeError("compare50 execution failed") + + print(f"✔️ Compare50 completed. Report available at: {config.output_dir}") + + + def check(self, lab_id: str): + # 1. Run Compare50 and get the report directory path + report_path = Path("reports/comparisons/lab2/index.html") + + # 2. Read HTML + with open(report_path, encoding="utf-8") as f: + html = f.read() + + # 3. Extract match tuples (source, target, score) + matches = extract_matches_from_html(html) + + # 4. Filter matches by threshold (e.g., > 0.8) + flagged = [match for match in matches if match[2] >= self.config.threshold] + + # 5. Flag plagiarism results to Google Sheets + for source, target, score in flagged: + self.sheets.flag_plagiarism(source, target, score) + +if __name__ == "__main__": + report_path = Path("reports/comparisons/ld/2/index.html") + with open(report_path, encoding="utf-8") as f: + html = f.read() + + matches = extract_matches_from_html(html) + print("Extracted matches:") + for source, target, score in matches: + print(f"{source} ↔ {target}: {score}") + diff --git a/services/plagiarism/downloader.py b/services/plagiarism/downloader.py new file mode 100644 index 0000000..7ece7f1 --- /dev/null +++ b/services/plagiarism/downloader.py @@ -0,0 +1,83 @@ +import os +import requests +from pathlib import Path +from typing import Optional, Dict + +class GitHubFileDownloader: + def __init__(self, github_token: Optional[str], course_config: Dict): + if github_token is None: + github_token = os.getenv("GITHUB_TOKEN_PLAGIARISM") or os.getenv("GITHUB_TOKEN") + + self.headers = { + "Authorization": f"Bearer {github_token}", + "Accept": "application/vnd.github.v3+json" + } + self.course_config = course_config + + def download_submission(self, lab_config: Dict, github_user: str, save_dir: Path) -> Optional[Path]: + prefix = self.course_config["github"].get("prefix", "") # fallback to empty if missing + prefix = lab_config.get("github-prefix", prefix) + repo = f"{prefix}-{github_user}" + org = self.course_config["github"]["organization"] + + if not self._check_ci_passed(org, repo): + print(f"Skipping {repo}: CI check failed.") + return None + + downloaded_files = [] + for filename in lab_config["files"]: + file_path = self._download_file(org, repo, filename, save_dir / github_user) + if file_path: + downloaded_files.append(file_path) + + return downloaded_files[0] if downloaded_files else None + + + def _check_ci_passed(self, org: str, repo: str) -> bool: + url = f"https://api.github.com/repos/{org}/{repo}/actions/runs?per_page=1" + #print(f"Checking CI status: ORG={org}, REPO={repo}, URL={url}") + + try: + resp = requests.get(url, headers=self.headers) + + if resp.status_code != 200: + print(f"Failed to fetch CI status for {org}/{repo}. Status Code: {resp.status_code}") + return False + + runs = resp.json().get("workflow_runs", []) + + if not runs: + print(f"No CI runs found for {org}/{repo}.") + return False + + # ✅ Only look at the most recent run + latest_run = runs[0] + + print(f"Latest CI run status: {latest_run.get('status')}, conclusion: {latest_run.get('conclusion')}") + + return ( + latest_run.get("status") == "completed" and + latest_run.get("conclusion") == "success" + ) + + except Exception as e: + print(f"Error fetching CI status for {org}/{repo}: {e}") + return False + + + def _download_file(self, org: str, repo: str, filename: str, save_dir: Path) -> Optional[Path]: + try: + url = f"https://api.github.com/repos/{org}/{repo}/contents/{filename}" + response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"}, timeout=10) + + if response.status_code == 200: + save_dir.mkdir(parents=True, exist_ok=True) + save_path = save_dir / filename + save_path.write_bytes(response.content) + return save_path + else: + print(f"❌ {filename} not found in {repo} (status {response.status_code})") + except Exception as e: + print(f"❗ Exception downloading {filename} from {repo}: {e}") + return None + diff --git a/services/plagiarism/models.py b/services/plagiarism/models.py new file mode 100644 index 0000000..3414255 --- /dev/null +++ b/services/plagiarism/models.py @@ -0,0 +1,41 @@ +from pydantic import BaseModel +from typing import List, Optional +from pathlib import Path + +# Individual file comparison info inside a plagiarism match +class CodeMatch(BaseModel): + file1: Path + file2: Path + similarity: float + matching_lines: List[tuple[int, int]] + +# The full result between two submissions +class PlagiarismResult(BaseModel): + submission1: str + submission2: str + overall_similarity: float + matches: List[CodeMatch] + report_path: Path + exceeds_threshold: bool + +# Top-level config class — driven by parsed YAML +class ComparisonConfig(BaseModel): + lab_id: str + course_id: str + + # Detection tuning + threshold: float # Minimum similarity to flag + language: str # Programming language, e.g., cc or py + max_matches: int # Limit number of comparisons + local_path: str # Local name to show in report + + # Files affecting detection + reference_files: List[Path] # Full path to extra distribution files + additional_orgs: List[str] # Reference submission orgs (see: --add) + basefiles: List[dict] # Repo-based basefiles (repo + filename) + + compare50_args: List[str] # Any extra compare50 CLI args + + # I/O paths + download_dir: Path # Where to fetch student submissions + output_dir: Path # Where to store reports diff --git a/services/plagiarism/parser.py b/services/plagiarism/parser.py new file mode 100644 index 0000000..138ed92 --- /dev/null +++ b/services/plagiarism/parser.py @@ -0,0 +1,19 @@ +# parser.py +import re +import json +from typing import List, Tuple + +def extract_matches_from_html(html: str) -> List[Tuple[str, str, float]]: + match = re.search(r'var\s+GRAPH\s*=\s*({.*?})\s*;', html, re.DOTALL) + if not match: + return [] + + graph = json.loads(match.group(1)) + return [ + ( + link['source']['id'] if isinstance(link['source'], dict) else link['source'], + link['target']['id'] if isinstance(link['target'], dict) else link['target'], + float(link['value']) + ) + for link in graph.get('links', []) + ] diff --git a/services/plagiarism/sheets_manager.py b/services/plagiarism/sheets_manager.py new file mode 100644 index 0000000..8abfd66 --- /dev/null +++ b/services/plagiarism/sheets_manager.py @@ -0,0 +1,90 @@ +import gspread +from oauth2client.service_account import ServiceAccountCredentials +from typing import List, Dict + +def excel_col_to_index(col: str) -> int: + """Convert Excel-style column label to 1-based index.""" + col = col.upper() + index = 0 + for char in col: + index = index * 26 + (ord(char) - ord('A') + 1) + return index + +class SheetsManager: + def __init__(self, config: Dict): + self.scope = [ + "https://www.googleapis.com/auth/spreadsheets", + "https://www.googleapis.com/auth/drive" + ] + self.creds = ServiceAccountCredentials.from_json_keyfile_name( + "credentials.json", self.scope + ) + self.client = gspread.authorize(self.creds) + self.config = config + + def get_student_repos(self) -> List[Dict]: + sheet = self.client.open_by_key(self.config["google"]["spreadsheet"]) + worksheet = sheet.get_worksheet(0) + github_col = excel_col_to_index(self.config["google"]["github-column"]) + start_row = self.config["google"]["start-row"] + + all_rows = worksheet.get_all_values() + valid_students = [] + + for idx, row in enumerate(all_rows): + if idx < start_row - 1: + continue + if len(row) < github_col: + continue # Row too short + github = row[github_col - 1].strip() + if not github: + continue # Empty GitHub cell + + valid_students.append({ + "github": github, + "row": idx + 1 # 1-based row index in Sheets + }) + + return valid_students + + def update_status(self, row: int, message: str): + sheet = self.client.open_by_key(self.config["google"]["spreadsheet"]) + worksheet = sheet.get_worksheet(0) + status_col = excel_col_to_index(self.config["google"]["status-column"]) + print(f"Updating status at row {row}: {message}") # Debug line + worksheet.update_cell(row, status_col, message) + + + + def flag_plagiarism(self, source: str, target: str, score: float): + """ + Update Google Sheets with plagiarism results. + Flags the pair (source, target) with their plagiarism score. + """ + sheet = self.client.open_by_key(self.config["google"]["spreadsheet"]) + worksheet = sheet.get_worksheet(0) + print(f"Flagging plagiarism between {source} and {target} with score {score}") + + # Find rows for the source and target students + source_row = self._find_student_row(source, worksheet) + target_row = self._find_student_row(target, worksheet) + + if source_row: + worksheet.update_cell(source_row, self.config["google"]["status-column"], f"⚠️ Detected: {score}") + + if target_row: + worksheet.update_cell(target_row, self.config["google"]["status-column"], f"⚠️ Detected: {score}") + + + def _find_student_row(self, github: str, worksheet) -> int: + """ + Find the row for a student in the sheet based on their GitHub username. + """ + github_col = excel_col_to_index(self.config["google"]["github-column"]) - 1 # 0-indexed + all_rows = worksheet.get_all_values() + + for idx, row in enumerate(all_rows): + if row[github_col].strip() == github: + return idx + 1 # Return 1-based row index + return None + diff --git a/} b/} new file mode 100644 index 0000000..e69de29