Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions BillNote_frontend/src/App.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import './App.css'
import { lazy, Suspense, useEffect } from 'react'
import { BrowserRouter, Navigate, Routes, Route } from 'react-router-dom'
import { HashRouter, Navigate, Routes, Route } from 'react-router-dom'
import { useTaskPolling } from '@/hooks/useTaskPolling.ts'
import { useCheckBackend } from '@/hooks/useCheckBackend.ts'
import { systemCheck } from '@/services/system.ts'
Expand Down Expand Up @@ -62,7 +62,7 @@ function App() {
<>
<StartupBanner />
<BackendHealthIndicator />
<BrowserRouter>
<HashRouter>
<Suspense fallback={<div className="flex h-screen items-center justify-center">加载中…</div>}>
<Routes>
<Route path="/onboarding" element={<Onboarding />} />
Expand All @@ -86,7 +86,7 @@ function App() {
</Route>
</Routes>
</Suspense>
</BrowserRouter>
</HashRouter>
</>
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ const steps = [
{ label: '保存完成', key: 'SUCCESS' },
]

const remarkPlugins = [gfm, remarkMath]
const rehypePlugins = [rehypeKatex]
const remarkPlugins = [gfm, [remarkMath, { singleDollarTextMath: true }]]
const rehypePlugins = [[rehypeKatex, { throwOnError: false, strict: false }]]

/**
* 构建 ReactMarkdown components 对象,baseURL 用于修正图片路径。
Expand Down
44 changes: 34 additions & 10 deletions backend/app/downloaders/bilibili_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import tempfile
import re
from abc import ABC
from typing import Union, Optional, List

Expand All @@ -12,7 +13,7 @@
from app.models.notes_model import AudioDownloadResult
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
from app.utils.url_parser import extract_video_id, extract_bilibili_task_id
from app.services.cookie_manager import CookieConfigManager

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,7 +55,11 @@ def download(
output_dir=self.cache_data
os.makedirs(output_dir, exist_ok=True)

output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
# 合集分 P 处理:识别 URL 中的 p 参数,避免总是下载第一集
p_match = re.search(r'[?&]p=(\d+)', video_url)
p_suffix = f"_p{p_match.group(1)}" if p_match else ""

output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s")
Comment on lines +58 to +62

ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
Expand All @@ -67,9 +72,14 @@ def download(
'preferredquality': '64',
}
],
'noplaylist': True,
'quiet': False,
}

if p_match:
ydl_opts['playlist_items'] = p_match.group(1)
else:
ydl_opts['noplaylist'] = True

if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile

Expand All @@ -79,7 +89,7 @@ def download(
title = info.get("title")
duration = info.get("duration", 0)
cover_url = info.get("thumbnail")
audio_path = os.path.join(output_dir, f"{video_id}.mp3")
audio_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp3")

return AudioDownloadResult(
file_path=audio_path,
Expand All @@ -106,30 +116,37 @@ def download_video(
os.makedirs(output_dir, exist_ok=True)
print("video_url",video_url)
video_id=extract_video_id(video_url, "bilibili")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
p_match = re.search(r'[?&]p=(\d+)', video_url)
p_suffix = f"_p{p_match.group(1)}" if p_match else ""
video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4")
if os.path.exists(video_path):
return video_path

# 检查是否已经存在


output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
output_path = os.path.join(output_dir, f"%(id)s{p_suffix}.%(ext)s")

ydl_opts = {
'format': 'bv*[ext=mp4]/bestvideo+bestaudio/best',
'outtmpl': output_path,
'http_headers': {'Referer': 'https://www.bilibili.com'},
'noplaylist': True,
'quiet': False,
'merge_output_format': 'mp4', # 确保合并成 mp4
}

if p_match:
ydl_opts['playlist_items'] = p_match.group(1)
else:
ydl_opts['noplaylist'] = True

if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
video_id = info.get("id")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
video_path = os.path.join(output_dir, f"{video_id}{p_suffix}.mp4")

if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件未找到: {video_path}")
Expand Down Expand Up @@ -175,17 +192,24 @@ def download_subtitles(self, video_url: str, output_dir: str = None,
langs = ['zh-Hans', 'zh', 'zh-CN', 'ai-zh', 'en', 'en-US']

video_id = extract_video_id(video_url, "bilibili")
task_id = extract_bilibili_task_id(video_url)

ydl_opts = {
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': langs,
'subtitlesformat': 'srt/json3/best', # 支持多种格式
'skip_download': True,
'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'),
'outtmpl': os.path.join(output_dir, f'{task_id}.%(ext)s'),
'quiet': True,
}

p_match = re.search(r'[?&]p=(\d+)', video_url)
if p_match:
ydl_opts['playlist_items'] = p_match.group(1)
else:
ydl_opts['noplaylist'] = True

# 通过 CookieConfigManager 注入 B站 Cookie(Netscape cookiefile)
if self._cookiefile:
ydl_opts['cookiefile'] = self._cookiefile
Expand Down Expand Up @@ -229,7 +253,7 @@ def download_subtitles(self, video_url: str, output_dir: str = None,

# 查找字幕文件
ext = sub_info.get('ext', 'srt')
subtitle_file = os.path.join(output_dir, f"{video_id}.{detected_lang}.{ext}")
subtitle_file = os.path.join(output_dir, f"{task_id}.{detected_lang}.{ext}")

if not os.path.exists(subtitle_file):
logger.info(f"字幕文件不存在: {subtitle_file}")
Expand Down
15 changes: 14 additions & 1 deletion backend/app/gpt/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,20 @@
2. **去除无关内容**:省略广告、填充词、问候语和不相关的言论。
3. **保留关键细节**:保留重要事实、示例、结论和建议。(如果额外重要的任务有格式需求可以不遵守)
4. **可读布局**:必要时使用项目符号,并保持段落简短,增强可读性。(如果额外重要的任务有格式需求可以不遵守)
5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现,适合 Markdown 渲染。
5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现:
- 行内公式用 `$...$`,$ 与公式内容之间**不能有空格**
- 块级公式用 `$$...$$`,**必须独占一行**,前后各空一行,$$ 与公式内容之间**不能有空格**
- 例如:

原问题为:

$$\begin{{aligned}}
\min \quad & f = 3x_1 - 5x_2
\end{{aligned}}$$

转换为:$z = -f$
- **禁止**用代码块(如 ` ```math ` 或 ` ``` `)包裹公式。
6. **禁止元叙述**:笔记中不要出现"视频讲了……"、"视频中提到……"、"在本视频中……"等指向视频本身的表述。直接总结内容即可,把信息当作客观事实陈述。
Comment on lines +41 to +54


请始终遵循此规则。
Expand Down
29 changes: 17 additions & 12 deletions backend/app/routers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,23 @@ def get_transcriber_models_status():
mlx_available = platform.system() == "Darwin"
mlx_statuses = []
if mlx_available:
from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP
for size in WHISPER_MODEL_SIZES:
mlx_key = f"mlx-{size}"
repo_id = MLX_MODEL_MAP.get(size)
# 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致
downloaded = _check_mlx_whisper_model_exists(size)
mlx_statuses.append({
"model_size": size,
"downloaded": downloaded,
"downloading": _downloading.get(mlx_key) == "downloading",
"available": repo_id is not None,
})
try:
from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP
for size in WHISPER_MODEL_SIZES:
mlx_key = f"mlx-{size}"
repo_id = MLX_MODEL_MAP.get(size)
# 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致
downloaded = _check_mlx_whisper_model_exists(size)
mlx_statuses.append({
"model_size": size,
"downloaded": downloaded,
"downloading": _downloading.get(mlx_key) == "downloading",
"available": repo_id is not None,
})
except ImportError:
# mlx-whisper 未安装或导入失败,跳过该分支,并反映实际不可用状态
mlx_available = False
logger.debug("MLX Whisper 未安装,跳过模型状态检查")

return R.success(data={
"whisper": statuses,
Expand Down
20 changes: 19 additions & 1 deletion backend/app/utils/url_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def extract_video_id(url: str, platform: str) -> Optional[str]:

# 匹配 BV号(如 BV1vc411b7Wa)
match = re.search(r"BV([0-9A-Za-z]+)", url)
return f"BV{match.group(1)}" if match else None
if not match:
return None
return f"BV{match.group(1)}"

elif platform == "youtube":
# 匹配 v=xxxxx 或 youtu.be/xxxxx,ID 长度通常为 11
Expand All @@ -35,6 +37,22 @@ def extract_video_id(url: str, platform: str) -> Optional[str]:
return None


def extract_bilibili_task_id(url: str) -> str:
"""
从 B 站链接中提取带分 P 信息的任务签名,用于缓存文件名区分不同 P。

:param url: Bilibili 视频链接
:return: 纯 BV 号,若存在 p 参数则追加为 BVxxxx_p2
"""
bvid = extract_video_id(url, "bilibili")
if not bvid:
return ""
p_match = re.search(r'[?&]p=(\d+)', url)
if p_match:
Comment on lines +47 to +51
return f"{bvid}_p{p_match.group(1)}"
return bvid


def resolve_bilibili_short_url(short_url: str) -> Optional[str]:
"""
解析哔哩哔哩短链接以获取真实视频链接
Expand Down
4 changes: 3 additions & 1 deletion backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from ffmpeg_helper import ensure_ffmpeg_or_raise

logger = get_logger(__name__)
load_dotenv()
# 支持在 backend/ 子目录中运行时也能加载项目根目录的 .env
dotenv_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env')
load_dotenv(dotenv_path)

# 读取 .env 中的路径
static_path = os.getenv('STATIC', '/static')
Expand Down
2 changes: 1 addition & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ click-repl==0.3.0
colorama==0.4.6
coloredlogs==15.0.1
cssselect2==0.8.0
ctranslate2==4.6.0
ctranslate2>=4.6.0,<5
distro==1.9.0
dnspython==2.7.0
email_validator==2.2.0
Expand Down
Loading