Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Dockerfile.complete
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ RUN set -ex && \
apt-get install -y --no-install-recommends ffmpeg nginx supervisor procps && \
rm -rf /var/lib/apt/lists/*

# 安装 Node.js 22(yt-dlp EJS 挑战求解器需要)并升级 yt-dlp
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
apt-get install -y nodejs && \
pip install --upgrade yt-dlp

ENV PATH="/usr/bin:${PATH}"
ENV HF_ENDPOINT=https://hf-mirror.com
ENV PYTHONUNBUFFERED=1
Expand Down
80 changes: 50 additions & 30 deletions backend/app/downloaders/youtube_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,49 @@
from app.services.proxy_config_manager import ProxyConfigManager
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
from app.services.cookie_manager import CookieConfigManager
import tempfile

logger = logging.getLogger(__name__)


def _apply_proxy(ydl_opts: dict) -> dict:
"""YouTube 在国内需要代理。配置了全局代理就塞进 yt-dlp opts。"""
proxy = ProxyConfigManager().get_proxy_url()
if proxy:
ydl_opts['proxy'] = proxy
logger.info(f"yt-dlp 走代理: {proxy}")
ydl_opts["proxy"] = proxy
logger.info(f"yt-dlp proxy: {proxy}")
return ydl_opts


class YoutubeDownloader(Downloader, ABC):
def __init__(self):

super().__init__()
self._cookie_mgr = CookieConfigManager()
self._cookie = self._cookie_mgr.get("youtube")
self._cookiefile = self._write_netscape_cookie_file()

def _write_netscape_cookie_file(self):
if not self._cookie:
return None
tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8")
cookie = self._cookie.strip()
# Netscape format from browser extension
if cookie.startswith("# Netscape") or cookie.startswith("# HTTP Cookie"):
tmp.write(cookie)
elif "\t" in cookie:
# Netscape format without header
tmp.write("# Netscape HTTP Cookie File\n")
tmp.write(cookie)
else:
# Simple key=value; key=value format
tmp.write("# Netscape HTTP Cookie File\n")
for pair in cookie.split("; "):
if "=" in pair:
key, value = pair.split("=", 1)
tmp.write(".youtube.com\tTRUE\t/\tFALSE\t0\t" + key + "\t" + value + "\n")
tmp.close()
logger.info("YouTube Netscape cookie file: %s", tmp.name)
return tmp.name

def download(
self,
Expand All @@ -47,15 +73,17 @@ def download(
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")

ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
'outtmpl': output_path,
'noplaylist': True,
'quiet': False,
"format": "bestaudio[ext=m4a]/bestaudio/best",
"outtmpl": output_path,
"noplaylist": True,
"quiet": False,
}

if skip_download:
ydl_opts['skip_download'] = True
ydl_opts["skip_download"] = True

if self._cookiefile:
ydl_opts["cookiefile"] = self._cookiefile
_apply_proxy(ydl_opts)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=not skip_download)
Expand All @@ -73,18 +101,16 @@ def download(
cover_url=cover_url,
platform="youtube",
video_id=video_id,
raw_info={'tags': info.get('tags')},
raw_info={"tags": info.get("tags")},
video_path=None,
)

def download_video(
self,
video_url: str,
output_dir: Union[str, None] = None,
max_height: int = 720,
) -> str:
"""
下载视频,返回视频文件路径
"""
if output_dir is None:
output_dir = get_data_dir()
video_id = extract_video_id(video_url, "youtube")
Expand All @@ -94,42 +120,36 @@ def download_video(
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")

fmt = f"bestvideo[height<={max_height}][ext=mp4]+bestaudio[ext=m4a]/best[height<={max_height}][ext=mp4]"
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
'outtmpl': output_path,
'noplaylist': True,
'quiet': False,
'merge_output_format': 'mp4', # 确保合并成 mp4
"format": fmt,
"outtmpl": output_path,
"noplaylist": True,
"quiet": False,
"merge_output_format": "mp4",
}

if self._cookiefile:
ydl_opts["cookiefile"] = self._cookiefile
_apply_proxy(ydl_opts)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
video_id = info.get("id")
video_path = os.path.join(output_dir, f"{video_id}.mp4")

if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件未找到: {video_path}")
raise FileNotFoundError(f"video not found: {video_path}")

return video_path

def download_subtitles(self, video_url: str, output_dir: str = None,
langs: List[str] = None) -> Optional[TranscriptResult]:
"""
通过 YouTube InnerTube API 直接获取字幕(优先人工字幕,其次自动生成)。
比 yt_dlp 方式更轻量,无需写临时文件到磁盘。

:param video_url: 视频链接
:param output_dir: 未使用(保留接口兼容)
:param langs: 优先语言列表
:return: TranscriptResult 或 None
"""
if langs is None:
langs = ['zh-Hans', 'zh', 'zh-CN', 'zh-TW', 'en', 'en-US', 'ja']
langs = ["zh-Hans", "zh", "zh-CN", "zh-TW", "en", "en-US", "ja"]

video_id = extract_video_id(video_url, "youtube")
fetcher = YouTubeSubtitleFetcher()
print(
f"尝试获取字幕,video_id={video_id}, langs={langs}"
f"fetch subtitles, video_id={video_id}, langs={langs}"
)
return fetcher.fetch_subtitles(video_id, langs)