diff --git a/-Headers b/-Headers
new file mode 100644
index 0000000..e69de29
diff --git a/-Uri b/-Uri
new file mode 100644
index 0000000..e69de29
diff --git a/.dockerignore b/.dockerignore
index 9550202..b9e6308 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,5 @@
__pycache__/
node_modules/
.env
-.git
\ No newline at end of file
+.git
+venv/
\ No newline at end of file
diff --git a/Accept b/Accept
new file mode 100644
index 0000000..e69de29
diff --git a/Authorization b/Authorization
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2c88d95
--- /dev/null
+++ b/README.md
@@ -0,0 +1,72 @@
+
+```markdown
+# Система обнаружения плагиата с помошью compare50/ Plagiarism Detection System
+
+## Быстрый старт / Quick Start
+
+### 1. Установка / Installation
+```bash
+pip install -r requirements.txt
+pip install compare50
+```
+
+### 2. Настройка / Configuration
+```bash
+echo "GITHUB_TOKEN=ваш_токен" > .env
+cp credentials.example.json credentials.json
+```
+
+Конфигурация `courses/{course_id}.yaml`:
+```yaml
+labs:
+ "1":
+ plagiarism:
+ enabled: true # Включить проверку
+ threshold: 7.5 # Порог сходства (0-100)
+ reference_files: [data/distribution/lab1.cpp] # Эталонные файлы
+```
+
+### 3. Запуск проверки / Running Checks
+```bash
+# Запуск API сервера
+uvicorn main:app --reload
+
+# Или прямое выполнение
+python -m services.plagiarism.checker --course ваш_идентификатор_курса
+```
+
+## Основные возможности / Key Features
+- **Автоматическое обнаружение плагиата** с использованием Compare50
+ *Automated code similarity detection using Compare50*
+- **Интеграция с GitHub CI** (проверяет только успешные сборки)
+ *GitHub CI integration (only checks passing builds)*
+- **Экспорт результатов** в Google Таблицы
+ *Results exported to Google Sheets*
+- **Генерация HTML отчетов**
+ *HTML reports generation*
+- **REST API + кнопка в интерфейсе**
+ *REST API + frontend button*
+
+## Базовое использование / Basic Usage
+1. Настройте YAML-файл курса
+ *Configure your course YAML file*
+2. Запустите проверку через:
+ *Run the check via:*
+ - API: `POST /api/plagiarism/run/{course_id}`
+ - CLI: `python -m services.plagiarism.checker --course ваш_идентификатор_курса`
+ - Интерфейс: Кнопка "Запустить проверку на плагиат"
+ *Frontend: Click "Run Plagiarism Check" button*
+3. Просмотр результатов:
+ *View results:*
+ - HTML: `reports/comparisons/{курс}/{лаба}/index.html`
+ *HTML: reports/comparisons/{course}/{lab}/index.html*
+ - Google Таблицы: Настроенная колонка статуса
+ *Google Sheets: Configured status column*
+
+## Требования / Requirements
+- Python 3.10+
+- Compare50
+- Токен GitHub (права repo/workflow)
+ *GitHub token (repo/workflow permissions)*
+- Аккаунт Google Service Account
+ *Google Service Account*
diff --git a/backend.Dockerfile b/backend.Dockerfile
index 0bdc087..b6a820a 100644
--- a/backend.Dockerfile
+++ b/backend.Dockerfile
@@ -2,6 +2,14 @@ FROM python:3.12-slim
WORKDIR /app
+# Install system dependencies for compare50 (Git + Rust)
+RUN apt-get update && apt-get install -y \
+ git \
+ curl \
+ && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
+ && export PATH="$HOME/.cargo/bin:$PATH" \
+ && rm -rf /var/lib/apt/lists/*
+
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip \
@@ -11,4 +19,4 @@ COPY . .
EXPOSE 8000
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/courses/os-2023.yaml b/courses/os-2023.yaml
index e644785..c45566c 100644
--- a/courses/os-2023.yaml
+++ b/courses/os-2023.yaml
@@ -1,124 +1,129 @@
course:
- name: ld
- logo: "/assets/machine-learning.png"
- alt-names:
- - ML
- - МД
- - Мо
- semester: Spring 2025
- email: k43guap@ya.ru
- timezone: UTC+3
- github:
- organization: suai-diplom-2025
- teachers:
- - "Mark Polyak"
- - markpolyak
- google:
- spreadsheet: 10iVAKvJVUyrjf7kEqm1TGOYk0lufyDJDbBItI5tnqAc
- info-sheet: График
- task-id-column: 0
- student-name-column: 2
- lab-column-offset: 1
- staff:
- - name: Поляк Марк Дмитриевич
- title: ст. преп.
- status: лектор
- - name: Поляк Марк Дмитриевич
- title: ст. преп.
- status: лабораторные работы
- labs:
- "1":
- github-prefix: ml-task1
- short-name: ЛР1
- taskid-max: 25
- penalty-max: 6
- ci:
- - workflows
- files:
- - lab1.sh
- moss:
- language: c
- max-matches: 1000
- local-path: lab1
- additional:
- - suai-os-2020
- - suai-os-2021
- - suai-os-2022
- - suai-os-2023
- basefiles:
- -
- repo: k43guap/os-course-task1
- filename: lab1.sh
- report:
- - Цель работы
- - Индивидуальное задание
- - Описание входных данных
- - Результат выполнения работы
- - Исходный код программы с комментариями
- - Выводы
- "2":
- github-prefix: ml-task2
- short-name: ЛР2
- taskid-max: 20
- taskid-shift: 4
- penalty-max: 9
- ci:
- - workflows
- files:
- - lab2.cpp
- moss:
- language: cc
- max-matches: 1000
- local-path: lab2
- additional:
- - suai-os-2020
- - suai-os-2021
- - suai-os-2022
- - suai-os-2023
- basefiles:
- -
- repo: k43guap/os-course-task2
- filename: lab2.cpp
- -
- repo: k43guap/os-course-task2
- filename: examples/ex3.cpp
- report:
- - Цель работы
- - Задание на лабораторную работу
- - Граф запуска потоков
- - Результат выполнения работы
- - Исходный код программы с комментариями
- - Выводы
- "3":
- github-prefix: ml-task3
- short-name: ЛР3
- taskid-max: 20
- penalty-max: 7
- ci:
- - workflows
- files:
- - lab3.cpp
- moss:
- language: cc
- max-matches: 1000
- local-path: lab3
- additional:
- - suai-os-2020
- - suai-os-2021
- - suai-os-2022
- - suai-os-2023
- basefiles:
- -
- repo: k43guap/os-course-task3
- filename: lab3.cpp
- report:
- - Цель работы
- - Задание на лабораторную работу
- - Граф запуска потоков
- - Результат выполнения работы
- - Исходный код программы с комментариями
- - Выводы
-misc:
+ name: ld
+ logo: "/assets/machine-learning.png"
+ alt-names:
+ - ML
+ - МД
+ - Мо
+ semester: Spring 2024
+ email: k43guap@ya.ru
+ timezone: UTC+3
+ github:
+ organization: suai-os-2024f
+ prefix: ml-task # Global fallback prefix for all labs
+ teachers:
+ - "Mark Polyak"
+ - markpolyak
+ google:
+ spreadsheet: 1cnHY7P9Rqnf7vc0FBzRiyvLCpV2KA6uTOO2SQp7s8SE
+ info-sheet: График
+ github-column: "AH"
+ status-column: "AI"
+ task-id-column: 0
+ student-name-column: 2
+ lab-column-offset: 1
+ start-row: 3
+ misc:
requests-timeout: 5
+ staff:
+ - name: Поляк Марк Дмитриевич
+ title: ст. преп.
+ status: лектор
+ - name: Поляк Марк Дмитриевич
+ title: ст. преп.
+ status: лабораторные работы
+ labs:
+ "1":
+ github-prefix: ml-task1
+ short-name: ЛР1
+ taskid-max: 25
+ penalty-max: 6
+ ci: true
+ files:
+ - lab1.sh
+ plagiarism:
+ enabled: false # Plagiarism enabled for all labs
+ threshold: 7.5 # Example threshold
+ language: sh
+ max-matches: 1000
+ additional:
+ - suai-os-2020
+ - suai-os-2021
+ - suai-os-2022
+ - suai-os-2023
+ basefiles:
+ - repo: k43guap/os-course-task1
+ filename: lab1.sh
+ report:
+ - Цель работы
+ - Индивидуальное задание
+ - Описание входных данных
+ - Результат выполнения работы
+ - Исходный код программы с комментариями
+ - Выводы
+ "2":
+ github-prefix: os-task2
+ short-name: ЛР2
+ taskid-max: 20
+ taskid-shift: 4
+ penalty-max: 9
+ ci: true
+ files:
+ - lab2.cpp
+ plagiarism:
+ enabled: true # Plagiarism enabled for all labs
+ threshold: 7.5
+ language: cc
+ reference_files:
+ - data/distribution/2/lab2.cpp
+ max-matches: 1000
+ additional:
+ - suai-os-2020
+ - suai-os-2021
+ - suai-os-2022
+ - suai-os-2023
+ basefiles:
+ - repo: k43guap/os-course-task2
+ filename: lab2.cpp
+ - repo: k43guap/os-course-task2
+ filename: examples/ex3.cpp
+ report:
+ - Цель работы
+ - Задание на лабораторную работу
+ - Граф запуска потоков
+ - Результат выполнения работы
+ - Исходный код программы с комментариями
+ - Выводы
+ "3":
+ github-prefix: os-task3
+ short-name: ЛР3
+ taskid-max: 20
+ penalty-max: 7
+ ci: true
+ files:
+ - lab3.cpp
+ plagiarism:
+ enabled: false # Plagiarism enabled for all labs
+ threshold: 7.5
+ language: cc
+ reference_files:
+ - data/distribution/ld/3/lab3.cpp
+ max-matches: 1000
+ additional:
+ - suai-os-2020
+ - suai-os-2021
+ - suai-os-2022
+ - suai-os-2023
+ basefiles:
+ - repo: k43guap/os-course-task3
+ filename: lab3.cpp
+ report:
+ - Цель работы
+ - Задание на лабораторную работу
+ - Граф запуска потоков
+ - Результат выполнения работы
+ - Исходный код программы с комментариями
+ - Выводы
-
+misc:
+ requests-timeout: 5
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 11516ac..014cc6f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,3 +21,6 @@ services:
- .:/app
env_file:
- .env
+
+volumes:
+ cargo-cache:
\ No newline at end of file
diff --git a/frontend/courses-front/package-lock.json b/frontend/courses-front/package-lock.json
index 051900d..3747ea6 100644
--- a/frontend/courses-front/package-lock.json
+++ b/frontend/courses-front/package-lock.json
@@ -15,8 +15,10 @@
"@uiw/react-codemirror": "^4.23.12",
"antd": "^5.24.4",
"axios": "^1.9.0",
+ "i18next": "^23.0.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
+ "react-i18next": "^13.0.1",
"react-router-dom": "^6.23.0",
"rollup": "^4.34.7",
"styled-components": "^6.1.16"
@@ -2166,6 +2168,7 @@
"version": "19.1.4",
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.4.tgz",
"integrity": "sha512-EB1yiiYdvySuIITtD5lhW4yPyJ31RkJkkDw794LaQYrxCSaQV/47y5o1FMC4zF9ZyjUjzJMZwbovEnT5yHTW6g==",
+ "dev": true,
"license": "MIT",
"dependencies": {
"csstype": "^3.0.2"
@@ -4266,6 +4269,14 @@
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
"license": "MIT"
},
+ "node_modules/html-parse-stringify": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmmirror.com/html-parse-stringify/-/html-parse-stringify-3.0.1.tgz",
+ "integrity": "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg==",
+ "dependencies": {
+ "void-elements": "3.1.0"
+ }
+ },
"node_modules/http-errors": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
@@ -4283,6 +4294,28 @@
"node": ">= 0.8"
}
},
+ "node_modules/i18next": {
+ "version": "23.16.8",
+ "resolved": "https://registry.npmmirror.com/i18next/-/i18next-23.16.8.tgz",
+ "integrity": "sha512-06r/TitrM88Mg5FdUXAKL96dJMzgqLE5dv3ryBAra4KCwD9mJ4ndOTS95ZuymIGoE+2hzfdaMak2X11/es7ZWg==",
+ "funding": [
+ {
+ "type": "individual",
+ "url": "https://locize.com"
+ },
+ {
+ "type": "individual",
+ "url": "https://locize.com/i18next.html"
+ },
+ {
+ "type": "individual",
+ "url": "https://www.i18next.com/how-to/faq#i18next-is-awesome.-how-can-i-support-the-project"
+ }
+ ],
+ "dependencies": {
+ "@babel/runtime": "^7.23.2"
+ }
+ },
"node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
@@ -6151,6 +6184,27 @@
"react": "^19.1.0"
}
},
+ "node_modules/react-i18next": {
+ "version": "13.5.0",
+ "resolved": "https://registry.npmmirror.com/react-i18next/-/react-i18next-13.5.0.tgz",
+ "integrity": "sha512-CFJ5NDGJ2MUyBohEHxljOq/39NQ972rh1ajnadG9BjTk+UXbHLq4z5DKEbEQBDoIhUmmbuS/fIMJKo6VOax1HA==",
+ "dependencies": {
+ "@babel/runtime": "^7.22.5",
+ "html-parse-stringify": "^3.0.1"
+ },
+ "peerDependencies": {
+ "i18next": ">= 23.2.3",
+ "react": ">= 16.8.0"
+ },
+ "peerDependenciesMeta": {
+ "react-dom": {
+ "optional": true
+ },
+ "react-native": {
+ "optional": true
+ }
+ }
+ },
"node_modules/react-is": {
"version": "19.1.0",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-19.1.0.tgz",
@@ -7233,6 +7287,14 @@
"node": "^10 || ^12 || >=14"
}
},
+ "node_modules/void-elements": {
+ "version": "3.1.0",
+ "resolved": "https://registry.npmmirror.com/void-elements/-/void-elements-3.1.0.tgz",
+ "integrity": "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==",
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
"node_modules/w3c-keyname": {
"version": "2.2.8",
"resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz",
@@ -7368,21 +7430,6 @@
"dev": true,
"license": "ISC"
},
- "node_modules/yaml": {
- "version": "2.8.0",
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz",
- "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==",
- "dev": true,
- "license": "ISC",
- "optional": true,
- "peer": true,
- "bin": {
- "yaml": "bin.mjs"
- },
- "engines": {
- "node": ">= 14.6"
- }
- },
"node_modules/yocto-queue": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
diff --git a/frontend/courses-front/src/App.css b/frontend/courses-front/src/App.css
index f135615..467cd22 100644
--- a/frontend/courses-front/src/App.css
+++ b/frontend/courses-front/src/App.css
@@ -44,4 +44,27 @@
.read-the-docs {
color: #888;
-}
\ No newline at end of file
+}
+/* Plagiarism specific styles
+.plagiarism-link {
+ color: #3182ce;
+ text-decoration: none;
+ font-weight: 500;
+ padding: 0.5rem;
+ border-radius: 4px;
+ transition: #ebf8ff 0.2s;
+}
+
+.plagiarism-link:hover {
+ background: #ebf8ff;
+ text-decoration: underline;
+}
+
+.error-message {
+ color: #e53e3e;
+ background: #fff5f5;
+ padding: 1rem;
+ border-radius: 4px;
+ margin-bottom: 1rem;
+ border-left: 4px solid #e53e3e;
+}*/
\ No newline at end of file
diff --git a/frontend/courses-front/src/App.jsx b/frontend/courses-front/src/App.jsx
index 54c8539..68ab848 100644
--- a/frontend/courses-front/src/App.jsx
+++ b/frontend/courses-front/src/App.jsx
@@ -35,4 +35,4 @@ function App() {
);
}
-export default App;
+export default App;
\ No newline at end of file
diff --git a/frontend/courses-front/src/api/index.js b/frontend/courses-front/src/api/index.js
index 3fbd4fc..dea9f9e 100644
--- a/frontend/courses-front/src/api/index.js
+++ b/frontend/courses-front/src/api/index.js
@@ -55,3 +55,28 @@ export async function gradeLab(courseId, groupId, labId, github) {
return response.json();
}
+/*
+export const checkPlagiarism = async (repoUrl, files, sensitivity) => {
+ const response = await fetch(`${API_BASE_URL}/admin/plagiarism-check`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Authorization": `Bearer ${localStorage.getItem('token')}`
+ },
+ body: JSON.stringify({ repoUrl, files, sensitivity }),
+ });
+
+ if (!response.ok) {
+ const errorData = await response.json();
+ throw new Error(errorData.message || 'Plagiarism check failed');
+ }
+
+ return response.json();
+};
+
+/*export const getPlagiarismConfig = async (courseId, labId) => {
+ const response = await fetch(
+ `${API_BASE_URL}/courses/${courseId}/labs/${labId}/plagiarism-config`
+ );
+ return response.json();
+};*/
\ No newline at end of file
diff --git a/frontend/courses-front/src/components/admin/AdminLogin.jsx b/frontend/courses-front/src/components/admin/AdminLogin.jsx
index 4d6e43e..e078f0a 100644
--- a/frontend/courses-front/src/components/admin/AdminLogin.jsx
+++ b/frontend/courses-front/src/components/admin/AdminLogin.jsx
@@ -65,4 +65,4 @@ export const AdminLogin = () => {
);
-};
+};
\ No newline at end of file
diff --git a/frontend/courses-front/src/components/admin/ProtectedRoute.jsx b/frontend/courses-front/src/components/admin/ProtectedRoute.jsx
index fd9ff3f..5253263 100644
--- a/frontend/courses-front/src/components/admin/ProtectedRoute.jsx
+++ b/frontend/courses-front/src/components/admin/ProtectedRoute.jsx
@@ -23,4 +23,4 @@ export const ProtectedRoute = ({ children }) => {
if (!isAuth) return ;
return children;
-};
+};
\ No newline at end of file
diff --git a/frontend/courses-front/src/components/admin/styled.js b/frontend/courses-front/src/components/admin/styled.js
index 8bdaf85..c095178 100644
--- a/frontend/courses-front/src/components/admin/styled.js
+++ b/frontend/courses-front/src/components/admin/styled.js
@@ -64,4 +64,4 @@ export const TextError = styled.p`
@media (max-width: ${breakpoints.tablet}) {
font-size: 12px;
}
-`;
+`;
\ No newline at end of file
diff --git a/frontend/courses-front/src/components/course-list/index.jsx b/frontend/courses-front/src/components/course-list/index.jsx
index 61f2d1e..8fdf710 100644
--- a/frontend/courses-front/src/components/course-list/index.jsx
+++ b/frontend/courses-front/src/components/course-list/index.jsx
@@ -189,7 +189,45 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => {
}
};
+const handleRunPlagiarismCheck = async (courseId) => {
+ try {
+ // 1. Run plagiarism check
+ const runResponse = await fetch(`http://127.0.0.1:8000/api/plagiarism/run/${courseId}`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': `Bearer ${localStorage.getItem('token')}`
+ }
+ });
+
+ const { status, checked_labs, course_name } = await runResponse.json();
+ showSnackbar(status || "Plagiarism check completed", "success");
+
+ // 2. Use first enabled lab
+ const activeLabId = checked_labs?.[0];
+ if (!activeLabId) {
+ showSnackbar("No labs with plagiarism checking enabled", "warning");
+ return;
+ }
+ // 3. Construct URL (no encoding needed for simple paths)
+ const reportUrl = `http://127.0.0.1:8000/reports/comparisons/${course_name}/${activeLabId}/index.html`;
+
+ // 4. Directly open in new tab with forced load
+ const newWindow = window.open(reportUrl, '_blank', 'noopener,noreferrer');
+
+ // Fallback if blocked by popup blocker
+ if (!newWindow || newWindow.closed || typeof newWindow.closed === 'undefined') {
+ showSnackbar("Please allow popups for this site", "warning");
+ // Alternative: redirect current tab
+ window.location.href = reportUrl;
+ }
+
+ } catch (error) {
+ showSnackbar("Failed to generate plagiarism report", "error");
+ console.error("Plagiarism check error:", error);
+ }
+};
const languages = [
{ code: "ru", label: "Русский" },
{ code: "en", label: "English" },
@@ -341,6 +379,7 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => {
>
{t("save")}
+
+
>
)}
@@ -434,4 +479,4 @@ export const CourseList = ({ onSelectCourse, isAdmin = false }) => {
);
-};
+};
\ No newline at end of file
diff --git a/frontend/courses-front/src/components/lab-list/index.jsx b/frontend/courses-front/src/components/lab-list/index.jsx
index 781297d..1c0680e 100644
--- a/frontend/courses-front/src/components/lab-list/index.jsx
+++ b/frontend/courses-front/src/components/lab-list/index.jsx
@@ -42,4 +42,4 @@ export const LabList = ({ courseId, groupId, onSelectLab, onBack }) => {
)}
);
-};
+};
\ No newline at end of file
diff --git a/frontend/courses-front/src/locales/en/translation.json b/frontend/courses-front/src/locales/en/translation.json
index 0b2e4b1..9aef879 100644
--- a/frontend/courses-front/src/locales/en/translation.json
+++ b/frontend/courses-front/src/locales/en/translation.json
@@ -19,5 +19,6 @@
"confirmDeleteText": "Are you sure you want to delete this course?",
"yes": "Yes",
"no": "No",
- "expand": "Expand"
+ "expand": "Expand",
+ "Plagiarism": "Plagiarism"
}
diff --git a/frontend/courses-front/src/locales/ru/translation.json b/frontend/courses-front/src/locales/ru/translation.json
index d30f0b8..cfd4622 100644
--- a/frontend/courses-front/src/locales/ru/translation.json
+++ b/frontend/courses-front/src/locales/ru/translation.json
@@ -19,5 +19,6 @@
"confirmDeleteText": "Вы уверены, что хотите удалить этот курс?",
"yes": "Да",
"no": "Нет",
- "expand": "Развернуть"
+ "expand": "Развернуть",
+ "Plagiarism": "Антиплагиат"
}
diff --git a/frontend/courses-front/src/locales/zh/translation.json b/frontend/courses-front/src/locales/zh/translation.json
index 3abb718..b38eeb2 100644
--- a/frontend/courses-front/src/locales/zh/translation.json
+++ b/frontend/courses-front/src/locales/zh/translation.json
@@ -1,5 +1,4 @@
{
- "loadCourse": "加载课程",
"courseUploaded": "课程上传成功",
"select": "选择",
"edit": "编辑",
@@ -24,5 +23,6 @@
"confirmDelete": "确认删除",
"confirmDeleteMessage": "您确定要删除此课程吗?",
"expand": "展开",
- "loadCourse": "加载课程"
+ "loadCourse": "加载课程",
+ "Plagiarism": "运行抄袭检查"
}
diff --git a/main.py b/main.py
index 5d2c207..cf6a494 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,6 @@
-from fastapi import FastAPI, Request, Response, HTTPException
+from fastapi import FastAPI, Request, Response, HTTPException # type: ignore
import os
+from fastapi.staticfiles import StaticFiles
import yaml
import gspread
import requests
@@ -7,11 +8,20 @@
from pydantic import BaseModel, Field
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
-from fastapi import UploadFile, File
+from fastapi import UploadFile, File, Depends
from dotenv import load_dotenv
from itsdangerous import TimestampSigner, BadSignature
import re
+#my edition
+from services.plagiarism import (
+ ComparisonConfig,
+ GitHubFileDownloader,
+ PlagiarismChecker
+)
+from pathlib import Path
+from typing import List
+
load_dotenv()
app = FastAPI()
COURSES_DIR = "courses"
@@ -27,6 +37,8 @@
allow_methods=["*"], # Разрешить все HTTP-методы
allow_headers=["*"], # Разрешить все заголовки
)
+REPORTS_DIR = Path("/mnt/e/summer practicals/lab_grader_web/reports").resolve()
+app.mount("/reports", StaticFiles(directory=REPORTS_DIR), name="reports")
signer = TimestampSigner(SECRET_KEY)
class AuthRequest(BaseModel):
@@ -106,6 +118,7 @@ def get_courses():
"semester": course_info.get("semester", "Unknown"),
"logo": course_info.get("logo", "/assets/default.png"),
"email": course_info.get("email", ""),
+ "config_id": os.path.splitext(filename)[0], # <-- ✅ Added this line
})
return courses
@@ -487,4 +500,56 @@ async def upload_course(file: UploadFile = File(...)):
with open(file_location, "wb") as f:
f.write(content)
- return {"detail": "Курс успешно загружен"}
\ No newline at end of file
+ return {"detail": "Курс успешно загружен"}
+
+def find_course_config(course_id: str) -> Path:
+ for ext in [".yaml", ".yml"]:
+ path = Path(f"courses/{course_id}{ext}")
+ if path.exists():
+ return path
+ raise HTTPException(status_code=404, detail=f"Course configuration '{course_id}.yaml/.yml' not found")
+
+@app.post("/api/plagiarism/run/{course_id}")
+async def run_plagiarism_check(course_id: str, request: Request):
+ # ... existing authentication code ...
+
+ config_path = find_course_config(course_id)
+ with open(config_path) as f:
+ config = yaml.safe_load(f)
+
+ # Find labs with plagiarism enabled
+ enabled_labs = []
+ for lab_id, lab_config in config["course"]["labs"].items():
+ if lab_config.get("plagiarism", {}).get("enabled", False):
+ enabled_labs.append(lab_id)
+
+ if not enabled_labs:
+ return {"status": "no labs with plagiarism checking enabled"}
+
+ # Run plagiarism check for each enabled lab
+ checker = PlagiarismChecker()
+ for lab_id in enabled_labs:
+ checker.run_pipeline(config["course"], lab_id)
+
+ return {
+ "status": "completed",
+ "checked_labs": enabled_labs,
+ "course_name": config["course"]["name"] # Add this line
+ }
+
+@app.get("/api/plagiarism/report-url/{course_name}/{lab_id}")
+async def get_plagiarism_report_url(course_name: str, lab_id: str):
+ """Returns the full URL to access the plagiarism report"""
+ report_path = Path(f"reports/comparisons/{course_name}/{lab_id}/index.html")
+
+ if not report_path.exists():
+ raise HTTPException(
+ status_code=404,
+ detail=f"Report not found at {report_path}"
+ )
+
+ # Construct full URL (adjust for your deployment)
+ base_url = "http://127.0.0.1:8000"
+ return {
+ "url": f"{base_url}/reports/comparisons/{course_name}/{lab_id}/index.html"
+ }
\ No newline at end of file
diff --git a/services/plagiarism/__init__.py b/services/plagiarism/__init__.py
new file mode 100644
index 0000000..c3a46ae
--- /dev/null
+++ b/services/plagiarism/__init__.py
@@ -0,0 +1,17 @@
+# This __init__.py makes the directory a Python package and allows
+# other modules to import from plagiarism using cleaner syntax.
+from .checker import PlagiarismChecker
+from .downloader import GitHubFileDownloader
+from .models import PlagiarismResult, ComparisonConfig, CodeMatch
+from .sheets_manager import SheetsManager
+from .parser import extract_matches_from_html
+
+__all__ = [
+ 'PlagiarismChecker',
+ 'GitHubFileDownloader',
+ 'SheetsManager',
+ 'PlagiarismResult',
+ 'ComparisonConfig',
+ 'CodeMatch',
+ 'extract_matches_from_html' # <-- Add this line
+]
diff --git a/services/plagiarism/checker.py b/services/plagiarism/checker.py
new file mode 100644
index 0000000..7b639f0
--- /dev/null
+++ b/services/plagiarism/checker.py
@@ -0,0 +1,240 @@
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import List, Dict, Optional
+import glob
+
+from dotenv import load_dotenv
+from .models import PlagiarismResult, ComparisonConfig
+from .downloader import GitHubFileDownloader
+from .sheets_manager import SheetsManager
+from .parser import extract_matches_from_html
+
+def _resolve_lab_key(course_config: Dict, lab_id: str) -> str:
+ for key, value in course_config["labs"].items():
+ if key == lab_id or value.get("short-name") == lab_id:
+ return key
+ raise KeyError(f"Could not resolve lab_id: '{lab_id}'")
+
+
+
+class PlagiarismChecker:
+ def __init__(self):
+ self.downloader = None
+ self.sheets = None
+
+ def _normalize_path(self, path: str) -> str:
+ """Normalize paths to consistent format for matching"""
+ path = str(path).replace('\\', '/').lower() # Convert to unix-style and lowercase
+ # Remove any trailing filenames to focus on directory paths
+ if '/' in path:
+ path = path.rsplit('/', 1)[0]
+ return path
+
+ def run_pipeline(self, course_config: Dict, lab_id: str) -> List[PlagiarismResult]:
+ self.sheets = SheetsManager(course_config)
+ self.downloader = GitHubFileDownloader(
+ github_token=os.getenv("GITHUB_TOKEN_PLAGIARISM") or os.getenv("GITHUB_TOKEN"),
+ course_config=course_config
+ )
+
+ lab_key = _resolve_lab_key(course_config, lab_id)
+ #lab_config = course_config["labs"][lab_key]
+ lab_config = course_config["labs"].get(lab_id)
+ if not lab_config or not lab_config.get("plagiarism", {}).get("enabled", False):
+ print(f"Skipping lab {lab_id} - plagiarism checking not enabled")
+ return []
+
+ download_dir = Path(f"data/submissions/{course_config['name']}/{lab_key}")
+ download_dir.mkdir(parents=True, exist_ok=True)
+
+ students = self._get_valid_submissions(lab_config, download_dir)
+
+ config = ComparisonConfig(
+ lab_id=lab_id,
+ course_id=course_config["name"],
+ threshold=lab_config["plagiarism"]["threshold"],
+ reference_files=[Path(p) for p in lab_config["plagiarism"]["reference_files"]],
+ compare50_args=lab_config["plagiarism"].get("compare50_args", []),
+ language=lab_config["plagiarism"]["language"],
+ max_matches=lab_config["plagiarism"]["max-matches"],
+ local_path=lab_config.get("github-prefix", f"lab-{lab_id}"),
+ additional_orgs=lab_config["plagiarism"]["additional"],
+ basefiles=lab_config["plagiarism"]["basefiles"],
+ download_dir=download_dir,
+ output_dir=Path(f"reports/comparisons/{course_config['name']}/{lab_key}")
+ )
+ threshold = config.threshold
+ print(f"Using threshold: {threshold}") # Debug line
+
+ self.run_comparison(config)
+ self._mark_reports_in_sheet(students, config)
+ return []
+
+ def _get_valid_submissions(self, lab_config: Dict, download_dir: Path) -> List[Dict]:
+ valid = []
+ for student in self.sheets.get_student_repos():
+ if self.downloader.download_submission(lab_config, student["github"], download_dir):
+ valid.append(student)
+ return valid
+
+
+ def _mark_reports_in_sheet(self, students: List[Dict], config: ComparisonConfig):
+ for student in students:
+ self.sheets.update_status(student['row'], "Not done")
+ print(f"Set default status for {student['github']} (row {student['row']}): Not done")
+
+ index_html = config.output_dir / "index.html"
+ if not index_html.exists():
+ print(f"❌ Expected HTML report not found at {index_html}")
+ return
+
+ with open(index_html, "r", encoding="utf-8") as f:
+ html = f.read()
+
+ matches = extract_matches_from_html(html)
+ threshold = config.threshold
+
+ print("\n=== DEBUGGING INFORMATION ===")
+ print(f"Threshold: {threshold}")
+ print("\nAll students in sheet:")
+ for student in students:
+ print(f"- {student['github']} (row {student['row']})")
+
+ print("\nRaw matches from compare50:")
+ for i, (source, target, score) in enumerate(matches, 1):
+ print(f"{i}. {source} ↔ {target} ({score})")
+
+ # Create student mapping
+ student_map = {student['github']: student for student in students}
+ flagged_students = set()
+
+ def extract_username(path: str) -> Optional[str]:
+ """Flexible username extraction from various path formats"""
+ path = path.replace('\\', '/').lower()
+ parts = [p for p in path.split('/') if p]
+
+ # Try multiple extraction strategies
+ for i, part in enumerate(parts):
+ # Match known student usernames in any path position
+ if part in student_map:
+ return part
+
+ # Fallback: look for username-like patterns
+ if len(parts) >= 2:
+ # Try second-to-last component
+ candidate = parts[-2]
+ if any(candidate in s['github'].lower() for s in students):
+ return candidate
+
+ return None
+
+ # Process each match
+ for source, target, score in matches:
+ if score >= threshold:
+ print(f"\nProcessing high-score match: {source} ↔ {target} ({score})")
+
+ source_user = extract_username(source)
+ target_user = extract_username(target)
+
+ print(f"Extracted usernames: source={source_user}, target={target_user}")
+
+ # Skip if either username is invalid
+ if not source_user or not target_user:
+ print("Skipping - could not extract both usernames")
+ continue
+
+ # Skip distribution matches
+ if 'distribution' in source.lower() or 'distribution' in target.lower():
+ print("Skipping - matches distribution code")
+ continue
+
+ # Flag both students involved
+ for username in [source_user, target_user]:
+ if username in student_map:
+ flagged_students.add(username)
+ print(f"Flagging {username} (row {student_map[username]['row']})")
+ else:
+ print(f"Username {username} not found in student records")
+
+ # Update Google Sheets
+ print("\nFinal updates to Google Sheets:")
+ for student in students:
+ status = "⚠️ Detected" if student['github'] in flagged_students else "✓ not detected"
+ print(f"Updating {student['github']} (row {student['row']}): {status}")
+ self.sheets.update_status(student['row'], status)
+
+ def run_comparison(self, config: ComparisonConfig) -> None:
+ # Remove the output directory to avoid overwrite prompts
+ if config.output_dir.exists():
+ shutil.rmtree(config.output_dir)
+
+ config.output_dir.mkdir(parents=True, exist_ok=True)
+
+ filename = f"lab{config.lab_id}.cpp"
+ reference_file = Path(f"data/distribution/{config.course_id}/{config.lab_id}/{filename}")
+ submission_glob = f"data/submissions/{config.course_id}/{config.lab_id}/*/{filename}"
+ submission_files = glob.glob(submission_glob)
+
+ if not reference_file.exists():
+ print(f"❌ Reference file not found: {reference_file}")
+ return
+
+ if not submission_files:
+ print(f"❌ No submission files found using glob: {submission_glob}")
+ return
+
+ cmd = [
+ "compare50",
+ "--distro", str(reference_file),
+ "--output", str(config.output_dir),
+ *submission_files
+ ]
+
+ print("Running Compare50 with auto-confirm...")
+ print(" ".join(cmd))
+
+ # Pipe `yes` into compare50 to auto-confirm any prompt
+ yes_proc = subprocess.Popen(['yes'], stdout=subprocess.PIPE)
+ result = subprocess.run(cmd, stdin=yes_proc.stdout, capture_output=True, text=True)
+ yes_proc.stdout.close() # Allow yes to receive a SIGPIPE if compare50 exits
+ yes_proc.wait()
+
+ if result.returncode != 0:
+ print("❌ Compare50 failed.")
+ print("STDOUT:", result.stdout)
+ print("STDERR:", result.stderr)
+ raise RuntimeError("compare50 execution failed")
+
+ print(f"✔️ Compare50 completed. Report available at: {config.output_dir}")
+
+
+ def check(self, lab_id: str):
+ # 1. Run Compare50 and get the report directory path
+ report_path = Path("reports/comparisons/lab2/index.html")
+
+ # 2. Read HTML
+ with open(report_path, encoding="utf-8") as f:
+ html = f.read()
+
+ # 3. Extract match tuples (source, target, score)
+ matches = extract_matches_from_html(html)
+
+ # 4. Filter matches by threshold (e.g., > 0.8)
+ flagged = [match for match in matches if match[2] >= self.config.threshold]
+
+ # 5. Flag plagiarism results to Google Sheets
+ for source, target, score in flagged:
+ self.sheets.flag_plagiarism(source, target, score)
+
+if __name__ == "__main__":
+ report_path = Path("reports/comparisons/ld/2/index.html")
+ with open(report_path, encoding="utf-8") as f:
+ html = f.read()
+
+ matches = extract_matches_from_html(html)
+ print("Extracted matches:")
+ for source, target, score in matches:
+ print(f"{source} ↔ {target}: {score}")
+
diff --git a/services/plagiarism/downloader.py b/services/plagiarism/downloader.py
new file mode 100644
index 0000000..7ece7f1
--- /dev/null
+++ b/services/plagiarism/downloader.py
@@ -0,0 +1,83 @@
+import os
+import requests
+from pathlib import Path
+from typing import Optional, Dict
+
+class GitHubFileDownloader:
+ def __init__(self, github_token: Optional[str], course_config: Dict):
+ if github_token is None:
+ github_token = os.getenv("GITHUB_TOKEN_PLAGIARISM") or os.getenv("GITHUB_TOKEN")
+
+ self.headers = {
+ "Authorization": f"Bearer {github_token}",
+ "Accept": "application/vnd.github.v3+json"
+ }
+ self.course_config = course_config
+
+ def download_submission(self, lab_config: Dict, github_user: str, save_dir: Path) -> Optional[Path]:
+ prefix = self.course_config["github"].get("prefix", "") # fallback to empty if missing
+ prefix = lab_config.get("github-prefix", prefix)
+ repo = f"{prefix}-{github_user}"
+ org = self.course_config["github"]["organization"]
+
+ if not self._check_ci_passed(org, repo):
+ print(f"Skipping {repo}: CI check failed.")
+ return None
+
+ downloaded_files = []
+ for filename in lab_config["files"]:
+ file_path = self._download_file(org, repo, filename, save_dir / github_user)
+ if file_path:
+ downloaded_files.append(file_path)
+
+ return downloaded_files[0] if downloaded_files else None
+
+
+ def _check_ci_passed(self, org: str, repo: str) -> bool:
+ url = f"https://api.github.com/repos/{org}/{repo}/actions/runs?per_page=1"
+ #print(f"Checking CI status: ORG={org}, REPO={repo}, URL={url}")
+
+ try:
+ resp = requests.get(url, headers=self.headers)
+
+ if resp.status_code != 200:
+ print(f"Failed to fetch CI status for {org}/{repo}. Status Code: {resp.status_code}")
+ return False
+
+ runs = resp.json().get("workflow_runs", [])
+
+ if not runs:
+ print(f"No CI runs found for {org}/{repo}.")
+ return False
+
+ # ✅ Only look at the most recent run
+ latest_run = runs[0]
+
+ print(f"Latest CI run status: {latest_run.get('status')}, conclusion: {latest_run.get('conclusion')}")
+
+ return (
+ latest_run.get("status") == "completed" and
+ latest_run.get("conclusion") == "success"
+ )
+
+ except Exception as e:
+ print(f"Error fetching CI status for {org}/{repo}: {e}")
+ return False
+
+
+ def _download_file(self, org: str, repo: str, filename: str, save_dir: Path) -> Optional[Path]:
+ try:
+ url = f"https://api.github.com/repos/{org}/{repo}/contents/{filename}"
+ response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"}, timeout=10)
+
+ if response.status_code == 200:
+ save_dir.mkdir(parents=True, exist_ok=True)
+ save_path = save_dir / filename
+ save_path.write_bytes(response.content)
+ return save_path
+ else:
+ print(f"❌ {filename} not found in {repo} (status {response.status_code})")
+ except Exception as e:
+ print(f"❗ Exception downloading {filename} from {repo}: {e}")
+ return None
+
diff --git a/services/plagiarism/models.py b/services/plagiarism/models.py
new file mode 100644
index 0000000..3414255
--- /dev/null
+++ b/services/plagiarism/models.py
@@ -0,0 +1,41 @@
+from pydantic import BaseModel
+from typing import List, Optional
+from pathlib import Path
+
+# Individual file comparison info inside a plagiarism match
+class CodeMatch(BaseModel):
+ file1: Path
+ file2: Path
+ similarity: float
+ matching_lines: List[tuple[int, int]]
+
+# The full result between two submissions
+class PlagiarismResult(BaseModel):
+ submission1: str
+ submission2: str
+ overall_similarity: float
+ matches: List[CodeMatch]
+ report_path: Path
+ exceeds_threshold: bool
+
+# Top-level config class — driven by parsed YAML
+class ComparisonConfig(BaseModel):
+ lab_id: str
+ course_id: str
+
+ # Detection tuning
+ threshold: float # Minimum similarity to flag
+ language: str # Programming language, e.g., cc or py
+ max_matches: int # Limit number of comparisons
+ local_path: str # Local name to show in report
+
+ # Files affecting detection
+ reference_files: List[Path] # Full path to extra distribution files
+ additional_orgs: List[str] # Reference submission orgs (see: --add)
+ basefiles: List[dict] # Repo-based basefiles (repo + filename)
+
+ compare50_args: List[str] # Any extra compare50 CLI args
+
+ # I/O paths
+ download_dir: Path # Where to fetch student submissions
+ output_dir: Path # Where to store reports
diff --git a/services/plagiarism/parser.py b/services/plagiarism/parser.py
new file mode 100644
index 0000000..138ed92
--- /dev/null
+++ b/services/plagiarism/parser.py
@@ -0,0 +1,19 @@
+# parser.py
+import re
+import json
+from typing import List, Tuple
+
+def extract_matches_from_html(html: str) -> List[Tuple[str, str, float]]:
+ match = re.search(r'var\s+GRAPH\s*=\s*({.*?})\s*;', html, re.DOTALL)
+ if not match:
+ return []
+
+ graph = json.loads(match.group(1))
+ return [
+ (
+ link['source']['id'] if isinstance(link['source'], dict) else link['source'],
+ link['target']['id'] if isinstance(link['target'], dict) else link['target'],
+ float(link['value'])
+ )
+ for link in graph.get('links', [])
+ ]
diff --git a/services/plagiarism/sheets_manager.py b/services/plagiarism/sheets_manager.py
new file mode 100644
index 0000000..8abfd66
--- /dev/null
+++ b/services/plagiarism/sheets_manager.py
@@ -0,0 +1,90 @@
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+from typing import List, Dict
+
+def excel_col_to_index(col: str) -> int:
+ """Convert Excel-style column label to 1-based index."""
+ col = col.upper()
+ index = 0
+ for char in col:
+ index = index * 26 + (ord(char) - ord('A') + 1)
+ return index
+
+class SheetsManager:
+ def __init__(self, config: Dict):
+ self.scope = [
+ "https://www.googleapis.com/auth/spreadsheets",
+ "https://www.googleapis.com/auth/drive"
+ ]
+ self.creds = ServiceAccountCredentials.from_json_keyfile_name(
+ "credentials.json", self.scope
+ )
+ self.client = gspread.authorize(self.creds)
+ self.config = config
+
+ def get_student_repos(self) -> List[Dict]:
+ sheet = self.client.open_by_key(self.config["google"]["spreadsheet"])
+ worksheet = sheet.get_worksheet(0)
+ github_col = excel_col_to_index(self.config["google"]["github-column"])
+ start_row = self.config["google"]["start-row"]
+
+ all_rows = worksheet.get_all_values()
+ valid_students = []
+
+ for idx, row in enumerate(all_rows):
+ if idx < start_row - 1:
+ continue
+ if len(row) < github_col:
+ continue # Row too short
+ github = row[github_col - 1].strip()
+ if not github:
+ continue # Empty GitHub cell
+
+ valid_students.append({
+ "github": github,
+ "row": idx + 1 # 1-based row index in Sheets
+ })
+
+ return valid_students
+
+ def update_status(self, row: int, message: str):
+ sheet = self.client.open_by_key(self.config["google"]["spreadsheet"])
+ worksheet = sheet.get_worksheet(0)
+ status_col = excel_col_to_index(self.config["google"]["status-column"])
+ print(f"Updating status at row {row}: {message}") # Debug line
+ worksheet.update_cell(row, status_col, message)
+
+
+
+ def flag_plagiarism(self, source: str, target: str, score: float):
+ """
+ Update Google Sheets with plagiarism results.
+ Flags the pair (source, target) with their plagiarism score.
+ """
+ sheet = self.client.open_by_key(self.config["google"]["spreadsheet"])
+ worksheet = sheet.get_worksheet(0)
+ print(f"Flagging plagiarism between {source} and {target} with score {score}")
+
+ # Find rows for the source and target students
+ source_row = self._find_student_row(source, worksheet)
+ target_row = self._find_student_row(target, worksheet)
+
+ if source_row:
+ worksheet.update_cell(source_row, self.config["google"]["status-column"], f"⚠️ Detected: {score}")
+
+ if target_row:
+ worksheet.update_cell(target_row, self.config["google"]["status-column"], f"⚠️ Detected: {score}")
+
+
+ def _find_student_row(self, github: str, worksheet) -> int:
+ """
+ Find the row for a student in the sheet based on their GitHub username.
+ """
+ github_col = excel_col_to_index(self.config["google"]["github-column"]) - 1 # 0-indexed
+ all_rows = worksheet.get_all_values()
+
+ for idx, row in enumerate(all_rows):
+ if row[github_col].strip() == github:
+ return idx + 1 # Return 1-based row index
+ return None
+
diff --git a/} b/}
new file mode 100644
index 0000000..e69de29