From a20003cefb867f245fe3a95d53b0a7b7b1426a14 Mon Sep 17 00:00:00 2001 From: Yurken <2380850316@qq.com> Date: Fri, 22 May 2026 13:00:05 +0800 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20B=E7=AB=99=E5=90=88=E9=9B=86?= =?UTF-8?q?=E5=88=86P=20+=20HashRouter=20+=20=E5=85=AC=E5=BC=8F=E6=B8=B2?= =?UTF-8?q?=E6=9F=93=20+=20prompt=20LaTeX=20=E8=AF=B4=E6=98=8E=20+=20ctran?= =?UTF-8?q?slate2=20=E5=85=BC=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - B站下载器支持合集分 P 参数识别,避免总是下载第一集 - URL 解析器提取分 P 参数防止不同 P 覆盖同一文件 - 前端改用 HashRouter 适配桌面/扩展场景 - MarkdownViewer 修复 remarkMath/rehypeKatex 配置(singleDollarTextMath、strict false) - prompt 新增 LaTeX 公式格式要求并修复 {aligned} KeyError - ctranslate2 版本兼容(>=4.6.0)+ mlx-whisper 导入保护 + .env 加载路径修复 Co-Authored-By: Claude Opus 4.7 --- BillNote_frontend/src/App.tsx | 6 ++-- .../HomePage/components/MarkdownViewer.tsx | 4 +-- .../app/downloaders/bilibili_downloader.py | 26 +++++++++++++++-- backend/app/gpt/prompt.py | 16 ++++++++++- backend/app/routers/config.py | 28 +++++++++++-------- backend/app/utils/url_parser.py | 9 +++++- backend/main.py | 4 ++- backend/requirements.txt | 2 +- 8 files changed, 72 insertions(+), 23 deletions(-) diff --git a/BillNote_frontend/src/App.tsx b/BillNote_frontend/src/App.tsx index d871506e..ddbd4df9 100644 --- a/BillNote_frontend/src/App.tsx +++ b/BillNote_frontend/src/App.tsx @@ -1,6 +1,6 @@ import './App.css' import { lazy, Suspense, useEffect } from 'react' -import { BrowserRouter, Navigate, Routes, Route } from 'react-router-dom' +import { HashRouter, Navigate, Routes, Route } from 'react-router-dom' import { useTaskPolling } from '@/hooks/useTaskPolling.ts' import { useCheckBackend } from '@/hooks/useCheckBackend.ts' import { systemCheck } from '@/services/system.ts' @@ -62,7 +62,7 @@ function App() { <> - + 加载中…}> } /> @@ -86,7 +86,7 @@ function App() { - + ) } diff --git a/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx b/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx index ac95cd3e..9265adc5 100644 --- a/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx +++ b/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx @@ -46,8 +46,8 @@ const steps = [ { label: '保存完成', key: 'SUCCESS' }, ] -const remarkPlugins = [gfm, remarkMath] -const rehypePlugins = [rehypeKatex] +const remarkPlugins = [gfm, [remarkMath, { singleDollarTextMath: true }]] +const rehypePlugins = [[rehypeKatex, { throwOnError: false, strict: false }]] /** * 构建 ReactMarkdown components 对象,baseURL 用于修正图片路径。 diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py index 0a94849f..df070635 100644 --- a/backend/app/downloaders/bilibili_downloader.py +++ b/backend/app/downloaders/bilibili_downloader.py @@ -2,6 +2,7 @@ import json import logging import tempfile +import re from abc import ABC from typing import Union, Optional, List @@ -67,9 +68,16 @@ def download( 'preferredquality': '64', } ], - 'noplaylist': True, 'quiet': False, } + + # 合集分 P 处理:识别 URL 中的 p 参数,避免总是下载第一集 + p_match = re.search(r'[?&]p=(\d+)', video_url) + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile @@ -119,10 +127,17 @@ def download_video( 'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best', 'outtmpl': output_path, 'http_headers': {'Referer': 'https://www.bilibili.com'}, - 'noplaylist': True, 'quiet': False, 'merge_output_format': 'mp4', # 确保合并成 mp4 } + + # 合集分 P 处理 + p_match = re.search(r'[?&]p=(\d+)', video_url) + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile @@ -186,6 +201,13 @@ def download_subtitles(self, video_url: str, output_dir: str = None, 'quiet': True, } + # 合集分 P 处理 + p_match = re.search(r'[?&]p=(\d+)', video_url) + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + # 通过 CookieConfigManager 注入 B站 Cookie(Netscape cookiefile) if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile diff --git a/backend/app/gpt/prompt.py b/backend/app/gpt/prompt.py index f09d25df..eda45cf5 100644 --- a/backend/app/gpt/prompt.py +++ b/backend/app/gpt/prompt.py @@ -38,7 +38,21 @@ 2. **去除无关内容**:省略广告、填充词、问候语和不相关的言论。 3. **保留关键细节**:保留重要事实、示例、结论和建议。(如果额外重要的任务有格式需求可以不遵守) 4. **可读布局**:必要时使用项目符号,并保持段落简短,增强可读性。(如果额外重要的任务有格式需求可以不遵守) -5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现,适合 Markdown 渲染。 +5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现: + - 行内公式用 `$...$`,$ 与公式内容之间**不能有空格** + - 块级公式用 `$$...$$`,**必须独占一行**,前后各空一行,$$ 与公式内容之间**不能有空格** + - 例如: + ``` + 原问题为: + + $$\begin{{aligned}} + \min \quad & f = 3x_1 - 5x_2 + \end{{aligned}}$$ + + 转换为:$z = -f$ + ``` + - **禁止**用代码块(如 ` ```math ` 或 ` ``` `)包裹公式。 +6. **禁止元叙述**:笔记中不要出现"视频讲了……"、"视频中提到……"、"在本视频中……"等指向视频本身的表述。直接总结内容即可,把信息当作客观事实陈述。 请始终遵循此规则。 diff --git a/backend/app/routers/config.py b/backend/app/routers/config.py index c816fc5b..e138d459 100644 --- a/backend/app/routers/config.py +++ b/backend/app/routers/config.py @@ -172,18 +172,22 @@ def get_transcriber_models_status(): mlx_available = platform.system() == "Darwin" mlx_statuses = [] if mlx_available: - from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP - for size in WHISPER_MODEL_SIZES: - mlx_key = f"mlx-{size}" - repo_id = MLX_MODEL_MAP.get(size) - # 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致 - downloaded = _check_mlx_whisper_model_exists(size) - mlx_statuses.append({ - "model_size": size, - "downloaded": downloaded, - "downloading": _downloading.get(mlx_key) == "downloading", - "available": repo_id is not None, - }) + try: + from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP + for size in WHISPER_MODEL_SIZES: + mlx_key = f"mlx-{size}" + repo_id = MLX_MODEL_MAP.get(size) + # 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致 + downloaded = _check_mlx_whisper_model_exists(size) + mlx_statuses.append({ + "model_size": size, + "downloaded": downloaded, + "downloading": _downloading.get(mlx_key) == "downloading", + "available": repo_id is not None, + }) + except Exception: + # mlx-whisper 未安装或导入失败,跳过该分支 + pass return R.success(data={ "whisper": statuses, diff --git a/backend/app/utils/url_parser.py b/backend/app/utils/url_parser.py index 8f76a169..012594cb 100644 --- a/backend/app/utils/url_parser.py +++ b/backend/app/utils/url_parser.py @@ -20,7 +20,14 @@ def extract_video_id(url: str, platform: str) -> Optional[str]: # 匹配 BV号(如 BV1vc411b7Wa) match = re.search(r"BV([0-9A-Za-z]+)", url) - return f"BV{match.group(1)}" if match else None + if not match: + return None + bv = f"BV{match.group(1)}" + # 提取分 P 参数(合集),避免不同 P 覆盖同一文件 + p_match = re.search(r'[?&]p=(\d+)', url) + if p_match: + return f"{bv}_p{p_match.group(1)}" + return bv elif platform == "youtube": # 匹配 v=xxxxx 或 youtu.be/xxxxx,ID 长度通常为 11 diff --git a/backend/main.py b/backend/main.py index 9f21a64f..57f0c2f0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,7 +20,9 @@ from ffmpeg_helper import ensure_ffmpeg_or_raise logger = get_logger(__name__) -load_dotenv() +# 支持在 backend/ 子目录中运行时也能加载项目根目录的 .env +dotenv_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env') +load_dotenv(dotenv_path) # 读取 .env 中的路径 static_path = os.getenv('STATIC', '/static') diff --git a/backend/requirements.txt b/backend/requirements.txt index b0d23268..a0b98a99 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -24,7 +24,7 @@ click-repl==0.3.0 colorama==0.4.6 coloredlogs==15.0.1 cssselect2==0.8.0 -ctranslate2==4.6.0 +ctranslate2>=4.6.0 distro==1.9.0 dnspython==2.7.0 email_validator==2.2.0 From ddfdbcbb3c8c8969530a0eeaa755d1d10295e07b Mon Sep 17 00:00:00 2001 From: Yurken <2380850316@qq.com> Date: Fri, 22 May 2026 13:16:51 +0800 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20=E4=BB=A3=E7=A0=81=E5=AE=A1=E6=9F=A5?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20=E2=80=94=20url=5Fparser=E3=80=81bilibili?= =?UTF-8?q?=5Fdownloader=E3=80=81prompt=E3=80=81config=E3=80=81requirement?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - url_parser: extract_video_id 恢复返回纯 BV 号,新增 extract_bilibili_task_id 用于带分 P 的任务签名,避免破坏 B 站 API 调用 - bilibili_downloader: outtmpl 与返回路径加入 p_suffix,防止不同 P 覆盖同名文件 - prompt: 公式示例去掉三引号代码块,避免与"禁止代码块包裹公式"矛盾 - config: mlx-whisper 导入捕获由 except Exception 收紧为 except ImportError - requirements: ctranslate2 版本约束加 <5 上界 Co-Authored-By: Claude Opus 4.7 --- .../app/downloaders/bilibili_downloader.py | 28 ++++++++++--------- backend/app/gpt/prompt.py | 3 +- backend/app/routers/config.py | 3 +- backend/app/utils/url_parser.py | 23 +++++++++++---- backend/requirements.txt | 2 +- 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py index df070635..631e59e6 100644 --- a/backend/app/downloaders/bilibili_downloader.py +++ b/backend/app/downloaders/bilibili_downloader.py @@ -13,7 +13,7 @@ from app.models.notes_model import AudioDownloadResult from app.models.transcriber_model import TranscriptResult, TranscriptSegment from app.utils.path_helper import get_data_dir -from app.utils.url_parser import extract_video_id +from app.utils.url_parser import extract_video_id, extract_bilibili_task_id from app.services.cookie_manager import CookieConfigManager logger = logging.getLogger(__name__) @@ -55,7 +55,11 @@ def download( output_dir=self.cache_data os.makedirs(output_dir, exist_ok=True) - output_path = os.path.join(output_dir, "%(id)s.%(ext)s") + # 合集分 P 处理:识别 URL 中的 p 参数,避免总是下载第一集 + p_match = re.search(r'[?&]p=(\d+)', video_url) + p_suffix = f"_p{p_match.group(1)}" if p_match else "" + + output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s") ydl_opts = { 'format': 'bestaudio[ext=m4a]/bestaudio/best', @@ -71,8 +75,6 @@ def download( 'quiet': False, } - # 合集分 P 处理:识别 URL 中的 p 参数,避免总是下载第一集 - p_match = re.search(r'[?&]p=(\d+)', video_url) if p_match: ydl_opts['playlist_items'] = p_match.group(1) else: @@ -87,7 +89,7 @@ def download( title = info.get("title") duration = info.get("duration", 0) cover_url = info.get("thumbnail") - audio_path = os.path.join(output_dir, f"{video_id}.mp3") + audio_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp3") return AudioDownloadResult( file_path=audio_path, @@ -114,14 +116,16 @@ def download_video( os.makedirs(output_dir, exist_ok=True) print("video_url",video_url) video_id=extract_video_id(video_url, "bilibili") - video_path = os.path.join(output_dir, f"{video_id}.mp4") + p_match = re.search(r'[?&]p=(\d+)', video_url) + p_suffix = f"_p{p_match.group(1)}" if p_match else "" + video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4") if os.path.exists(video_path): return video_path # 检查是否已经存在 - output_path = os.path.join(output_dir, "%(id)s.%(ext)s") + output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s") ydl_opts = { 'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best', @@ -131,8 +135,6 @@ def download_video( 'merge_output_format': 'mp4', # 确保合并成 mp4 } - # 合集分 P 处理 - p_match = re.search(r'[?&]p=(\d+)', video_url) if p_match: ydl_opts['playlist_items'] = p_match.group(1) else: @@ -144,7 +146,7 @@ def download_video( with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=True) video_id = info.get("id") - video_path = os.path.join(output_dir, f"{video_id}.mp4") + video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4") if not os.path.exists(video_path): raise FileNotFoundError(f"视频文件未找到: {video_path}") @@ -190,6 +192,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None, langs = ['zh-Hans', 'zh', 'zh-CN', 'ai-zh', 'en', 'en-US'] video_id = extract_video_id(video_url, "bilibili") + task_id = extract_bilibili_task_id(video_url) ydl_opts = { 'writesubtitles': True, @@ -197,11 +200,10 @@ def download_subtitles(self, video_url: str, output_dir: str = None, 'subtitleslangs': langs, 'subtitlesformat': 'srt/json3/best', # 支持多种格式 'skip_download': True, - 'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'), + 'outtmpl': os.path.join(output_dir, f'{task_id}.%(ext)s'), 'quiet': True, } - # 合集分 P 处理 p_match = re.search(r'[?&]p=(\d+)', video_url) if p_match: ydl_opts['playlist_items'] = p_match.group(1) @@ -251,7 +253,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None, # 查找字幕文件 ext = sub_info.get('ext', 'srt') - subtitle_file = os.path.join(output_dir, f"{video_id}.{detected_lang}.{ext}") + subtitle_file = os.path.join(output_dir, f"{task_id}.{detected_lang}.{ext}") if not os.path.exists(subtitle_file): logger.info(f"字幕文件不存在: {subtitle_file}") diff --git a/backend/app/gpt/prompt.py b/backend/app/gpt/prompt.py index eda45cf5..0ebf0004 100644 --- a/backend/app/gpt/prompt.py +++ b/backend/app/gpt/prompt.py @@ -42,7 +42,7 @@ - 行内公式用 `$...$`,$ 与公式内容之间**不能有空格** - 块级公式用 `$$...$$`,**必须独占一行**,前后各空一行,$$ 与公式内容之间**不能有空格** - 例如: - ``` + 原问题为: $$\begin{{aligned}} @@ -50,7 +50,6 @@ \end{{aligned}}$$ 转换为:$z = -f$ - ``` - **禁止**用代码块(如 ` ```math ` 或 ` ``` `)包裹公式。 6. **禁止元叙述**:笔记中不要出现"视频讲了……"、"视频中提到……"、"在本视频中……"等指向视频本身的表述。直接总结内容即可,把信息当作客观事实陈述。 diff --git a/backend/app/routers/config.py b/backend/app/routers/config.py index e138d459..17182fde 100644 --- a/backend/app/routers/config.py +++ b/backend/app/routers/config.py @@ -185,8 +185,9 @@ def get_transcriber_models_status(): "downloading": _downloading.get(mlx_key) == "downloading", "available": repo_id is not None, }) - except Exception: + except ImportError: # mlx-whisper 未安装或导入失败,跳过该分支 + logger.warning("MLX Whisper 未安装,跳过模型状态检查") pass return R.success(data={ diff --git a/backend/app/utils/url_parser.py b/backend/app/utils/url_parser.py index 012594cb..515ceca9 100644 --- a/backend/app/utils/url_parser.py +++ b/backend/app/utils/url_parser.py @@ -22,12 +22,7 @@ def extract_video_id(url: str, platform: str) -> Optional[str]: match = re.search(r"BV([0-9A-Za-z]+)", url) if not match: return None - bv = f"BV{match.group(1)}" - # 提取分 P 参数(合集),避免不同 P 覆盖同一文件 - p_match = re.search(r'[?&]p=(\d+)', url) - if p_match: - return f"{bv}_p{p_match.group(1)}" - return bv + return f"BV{match.group(1)}" elif platform == "youtube": # 匹配 v=xxxxx 或 youtu.be/xxxxx,ID 长度通常为 11 @@ -42,6 +37,22 @@ def extract_video_id(url: str, platform: str) -> Optional[str]: return None +def extract_bilibili_task_id(url: str) -> str: + """ + 从 B 站链接中提取带分 P 信息的任务签名,用于缓存文件名区分不同 P。 + + :param url: Bilibili 视频链接 + :return: 纯 BV 号,若存在 p 参数则追加为 BVxxxx_p2 + """ + bvid = extract_video_id(url, "bilibili") + if not bvid: + return "" + p_match = re.search(r'[?&]p=(\d+)', url) + if p_match: + return f"{bvid}_p{p_match.group(1)}" + return bvid + + def resolve_bilibili_short_url(short_url: str) -> Optional[str]: """ 解析哔哩哔哩短链接以获取真实视频链接 diff --git a/backend/requirements.txt b/backend/requirements.txt index a0b98a99..06048148 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -24,7 +24,7 @@ click-repl==0.3.0 colorama==0.4.6 coloredlogs==15.0.1 cssselect2==0.8.0 -ctranslate2>=4.6.0 +ctranslate2>=4.6.0,<5 distro==1.9.0 dnspython==2.7.0 email_validator==2.2.0 From 615ea13049800dce220d474032fdef02f7e07306 Mon Sep 17 00:00:00 2001 From: forest <45279453+Yurken@users.noreply.github.com> Date: Fri, 22 May 2026 13:23:53 +0800 Subject: [PATCH 3/3] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- backend/app/routers/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/routers/config.py b/backend/app/routers/config.py index 17182fde..600b0ae7 100644 --- a/backend/app/routers/config.py +++ b/backend/app/routers/config.py @@ -186,9 +186,9 @@ def get_transcriber_models_status(): "available": repo_id is not None, }) except ImportError: - # mlx-whisper 未安装或导入失败,跳过该分支 - logger.warning("MLX Whisper 未安装,跳过模型状态检查") - pass + # mlx-whisper 未安装或导入失败,跳过该分支,并反映实际不可用状态 + mlx_available = False + logger.debug("MLX Whisper 未安装,跳过模型状态检查") return R.success(data={ "whisper": statuses,