diff --git a/BillNote_frontend/src/App.tsx b/BillNote_frontend/src/App.tsx index d871506e..ddbd4df9 100644 --- a/BillNote_frontend/src/App.tsx +++ b/BillNote_frontend/src/App.tsx @@ -1,6 +1,6 @@ import './App.css' import { lazy, Suspense, useEffect } from 'react' -import { BrowserRouter, Navigate, Routes, Route } from 'react-router-dom' +import { HashRouter, Navigate, Routes, Route } from 'react-router-dom' import { useTaskPolling } from '@/hooks/useTaskPolling.ts' import { useCheckBackend } from '@/hooks/useCheckBackend.ts' import { systemCheck } from '@/services/system.ts' @@ -62,7 +62,7 @@ function App() { <> - + 加载中…}> } /> @@ -86,7 +86,7 @@ function App() { - + ) } diff --git a/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx b/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx index ac95cd3e..9265adc5 100644 --- a/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx +++ b/BillNote_frontend/src/pages/HomePage/components/MarkdownViewer.tsx @@ -46,8 +46,8 @@ const steps = [ { label: '保存完成', key: 'SUCCESS' }, ] -const remarkPlugins = [gfm, remarkMath] -const rehypePlugins = [rehypeKatex] +const remarkPlugins = [gfm, [remarkMath, { singleDollarTextMath: true }]] +const rehypePlugins = [[rehypeKatex, { throwOnError: false, strict: false }]] /** * 构建 ReactMarkdown components 对象,baseURL 用于修正图片路径。 diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py index 0a94849f..631e59e6 100644 --- a/backend/app/downloaders/bilibili_downloader.py +++ b/backend/app/downloaders/bilibili_downloader.py @@ -2,6 +2,7 @@ import json import logging import tempfile +import re from abc import ABC from typing import Union, Optional, List @@ -12,7 +13,7 @@ from app.models.notes_model import AudioDownloadResult from app.models.transcriber_model import TranscriptResult, TranscriptSegment from app.utils.path_helper import get_data_dir -from app.utils.url_parser import extract_video_id +from app.utils.url_parser import extract_video_id, extract_bilibili_task_id from app.services.cookie_manager import CookieConfigManager logger = logging.getLogger(__name__) @@ -54,7 +55,11 @@ def download( output_dir=self.cache_data os.makedirs(output_dir, exist_ok=True) - output_path = os.path.join(output_dir, "%(id)s.%(ext)s") + # 合集分 P 处理:识别 URL 中的 p 参数,避免总是下载第一集 + p_match = re.search(r'[?&]p=(\d+)', video_url) + p_suffix = f"_p{p_match.group(1)}" if p_match else "" + + output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s") ydl_opts = { 'format': 'bestaudio[ext=m4a]/bestaudio/best', @@ -67,9 +72,14 @@ def download( 'preferredquality': '64', } ], - 'noplaylist': True, 'quiet': False, } + + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile @@ -79,7 +89,7 @@ def download( title = info.get("title") duration = info.get("duration", 0) cover_url = info.get("thumbnail") - audio_path = os.path.join(output_dir, f"{video_id}.mp3") + audio_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp3") return AudioDownloadResult( file_path=audio_path, @@ -106,30 +116,37 @@ def download_video( os.makedirs(output_dir, exist_ok=True) print("video_url",video_url) video_id=extract_video_id(video_url, "bilibili") - video_path = os.path.join(output_dir, f"{video_id}.mp4") + p_match = re.search(r'[?&]p=(\d+)', video_url) + p_suffix = f"_p{p_match.group(1)}" if p_match else "" + video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4") if os.path.exists(video_path): return video_path # 检查是否已经存在 - output_path = os.path.join(output_dir, "%(id)s.%(ext)s") + output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s") ydl_opts = { 'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best', 'outtmpl': output_path, 'http_headers': {'Referer': 'https://www.bilibili.com'}, - 'noplaylist': True, 'quiet': False, 'merge_output_format': 'mp4', # 确保合并成 mp4 } + + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=True) video_id = info.get("id") - video_path = os.path.join(output_dir, f"{video_id}.mp4") + video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4") if not os.path.exists(video_path): raise FileNotFoundError(f"视频文件未找到: {video_path}") @@ -175,6 +192,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None, langs = ['zh-Hans', 'zh', 'zh-CN', 'ai-zh', 'en', 'en-US'] video_id = extract_video_id(video_url, "bilibili") + task_id = extract_bilibili_task_id(video_url) ydl_opts = { 'writesubtitles': True, @@ -182,10 +200,16 @@ def download_subtitles(self, video_url: str, output_dir: str = None, 'subtitleslangs': langs, 'subtitlesformat': 'srt/json3/best', # 支持多种格式 'skip_download': True, - 'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'), + 'outtmpl': os.path.join(output_dir, f'{task_id}.%(ext)s'), 'quiet': True, } + p_match = re.search(r'[?&]p=(\d+)', video_url) + if p_match: + ydl_opts['playlist_items'] = p_match.group(1) + else: + ydl_opts['noplaylist'] = True + # 通过 CookieConfigManager 注入 B站 Cookie(Netscape cookiefile) if self._cookiefile: ydl_opts['cookiefile'] = self._cookiefile @@ -229,7 +253,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None, # 查找字幕文件 ext = sub_info.get('ext', 'srt') - subtitle_file = os.path.join(output_dir, f"{video_id}.{detected_lang}.{ext}") + subtitle_file = os.path.join(output_dir, f"{task_id}.{detected_lang}.{ext}") if not os.path.exists(subtitle_file): logger.info(f"字幕文件不存在: {subtitle_file}") diff --git a/backend/app/gpt/prompt.py b/backend/app/gpt/prompt.py index f09d25df..0ebf0004 100644 --- a/backend/app/gpt/prompt.py +++ b/backend/app/gpt/prompt.py @@ -38,7 +38,20 @@ 2. **去除无关内容**:省略广告、填充词、问候语和不相关的言论。 3. **保留关键细节**:保留重要事实、示例、结论和建议。(如果额外重要的任务有格式需求可以不遵守) 4. **可读布局**:必要时使用项目符号,并保持段落简短,增强可读性。(如果额外重要的任务有格式需求可以不遵守) -5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现,适合 Markdown 渲染。 +5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现: + - 行内公式用 `$...$`,$ 与公式内容之间**不能有空格** + - 块级公式用 `$$...$$`,**必须独占一行**,前后各空一行,$$ 与公式内容之间**不能有空格** + - 例如: + + 原问题为: + + $$\begin{{aligned}} + \min \quad & f = 3x_1 - 5x_2 + \end{{aligned}}$$ + + 转换为:$z = -f$ + - **禁止**用代码块(如 ` ```math ` 或 ` ``` `)包裹公式。 +6. **禁止元叙述**:笔记中不要出现"视频讲了……"、"视频中提到……"、"在本视频中……"等指向视频本身的表述。直接总结内容即可,把信息当作客观事实陈述。 请始终遵循此规则。 diff --git a/backend/app/routers/config.py b/backend/app/routers/config.py index c816fc5b..600b0ae7 100644 --- a/backend/app/routers/config.py +++ b/backend/app/routers/config.py @@ -172,18 +172,23 @@ def get_transcriber_models_status(): mlx_available = platform.system() == "Darwin" mlx_statuses = [] if mlx_available: - from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP - for size in WHISPER_MODEL_SIZES: - mlx_key = f"mlx-{size}" - repo_id = MLX_MODEL_MAP.get(size) - # 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致 - downloaded = _check_mlx_whisper_model_exists(size) - mlx_statuses.append({ - "model_size": size, - "downloaded": downloaded, - "downloading": _downloading.get(mlx_key) == "downloading", - "available": repo_id is not None, - }) + try: + from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP + for size in WHISPER_MODEL_SIZES: + mlx_key = f"mlx-{size}" + repo_id = MLX_MODEL_MAP.get(size) + # 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致 + downloaded = _check_mlx_whisper_model_exists(size) + mlx_statuses.append({ + "model_size": size, + "downloaded": downloaded, + "downloading": _downloading.get(mlx_key) == "downloading", + "available": repo_id is not None, + }) + except ImportError: + # mlx-whisper 未安装或导入失败,跳过该分支,并反映实际不可用状态 + mlx_available = False + logger.debug("MLX Whisper 未安装,跳过模型状态检查") return R.success(data={ "whisper": statuses, diff --git a/backend/app/utils/url_parser.py b/backend/app/utils/url_parser.py index 8f76a169..515ceca9 100644 --- a/backend/app/utils/url_parser.py +++ b/backend/app/utils/url_parser.py @@ -20,7 +20,9 @@ def extract_video_id(url: str, platform: str) -> Optional[str]: # 匹配 BV号(如 BV1vc411b7Wa) match = re.search(r"BV([0-9A-Za-z]+)", url) - return f"BV{match.group(1)}" if match else None + if not match: + return None + return f"BV{match.group(1)}" elif platform == "youtube": # 匹配 v=xxxxx 或 youtu.be/xxxxx,ID 长度通常为 11 @@ -35,6 +37,22 @@ def extract_video_id(url: str, platform: str) -> Optional[str]: return None +def extract_bilibili_task_id(url: str) -> str: + """ + 从 B 站链接中提取带分 P 信息的任务签名,用于缓存文件名区分不同 P。 + + :param url: Bilibili 视频链接 + :return: 纯 BV 号,若存在 p 参数则追加为 BVxxxx_p2 + """ + bvid = extract_video_id(url, "bilibili") + if not bvid: + return "" + p_match = re.search(r'[?&]p=(\d+)', url) + if p_match: + return f"{bvid}_p{p_match.group(1)}" + return bvid + + def resolve_bilibili_short_url(short_url: str) -> Optional[str]: """ 解析哔哩哔哩短链接以获取真实视频链接 diff --git a/backend/main.py b/backend/main.py index 9f21a64f..57f0c2f0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,7 +20,9 @@ from ffmpeg_helper import ensure_ffmpeg_or_raise logger = get_logger(__name__) -load_dotenv() +# 支持在 backend/ 子目录中运行时也能加载项目根目录的 .env +dotenv_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env') +load_dotenv(dotenv_path) # 读取 .env 中的路径 static_path = os.getenv('STATIC', '/static') diff --git a/backend/requirements.txt b/backend/requirements.txt index b0d23268..06048148 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -24,7 +24,7 @@ click-repl==0.3.0 colorama==0.4.6 coloredlogs==15.0.1 cssselect2==0.8.0 -ctranslate2==4.6.0 +ctranslate2>=4.6.0,<5 distro==1.9.0 dnspython==2.7.0 email_validator==2.2.0