From 72ff86564a7a28e6c3a1de28f6cab707be5bd6ae Mon Sep 17 00:00:00 2001 From: Tonyhuang <129367165+tuofangzhe@users.noreply.github.com> Date: Sun, 10 May 2026 14:27:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(seo):=20/r/{job=5Fid}=20=E7=BD=91=E9=A1=B5?= =?UTF-8?q?=20title=20=E4=B8=8E=E9=9D=A2=E5=8C=85=E5=B1=91=E5=8A=A0?= =?UTF-8?q?=E5=85=A5=E6=A3=80=E6=B5=8B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 之前同一中转站下所有报告共享同一个 title: 「www.fucheers.top OpenAI 中转站检测:91/100 存在风险 | Veridrop」 不同模型的报告(gpt-5.5 vs gpt-5.4-mini vs gpt-4o)title 一字不差, Google 直接当成重复页面去重,长尾流量(「XX 站测 gpt-5.5 怎么样」) 全部 sink 到一个 canonical 上,索引面缩成一份。 改动: - web/server.py _seo_meta_for_report:title 模板插入 target_model 「{domain} OpenAI 中转站 gpt-5.5 检测:91/100 存在风险 | Veridrop」 无 model 时退回原文,避免出现 "中转站 检测" 双空格痕迹 - web/templates/result.html 面包屑末尾段: 从「报告 #{job_id}」改为「{target_model}」 没有 target_model 时仍回退到「报告 #{job_id}」 效果: - 每份报告 title 唯一,Google 视为独立 indexable page - 面包屑文字在跨报告导航时也能立刻看出测的是哪个模型 - 报告分享卡片(og_title 同 seo_title)也直接带模型名 测试:tests/test_seo_meta.py 5 个用例覆盖: - 标题含 model - 空 model 回退不留空格痕迹 - 同域名不同 model 标题相互区分(SEO 反 dedupe 核心断言) - 长 dated snapshot 不超 155 字符 - 描述仍提及 model(防回归) --- tests/test_seo_meta.py | 83 +++++++++++++++++++++++++++++++++++++++ web/server.py | 15 +++++-- web/templates/result.html | 9 +++-- 3 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 tests/test_seo_meta.py diff --git a/tests/test_seo_meta.py b/tests/test_seo_meta.py new file mode 100644 index 0000000..9e36664 --- /dev/null +++ b/tests/test_seo_meta.py @@ -0,0 +1,83 @@ +"""Tests for /r/{job_id} SEO meta — title diversity drives long-tail SEO, +so model name must appear in the page title, not just domain + verdict. +Without per-model variation, two reports on the same relay collide on the +same title text and Google deduplicates them out of the index. +""" + +from __future__ import annotations + +from web.server import _seo_meta_for_report + + +def _base_report(**overrides): + """Minimal finished-report shape that _seo_meta_for_report consumes.""" + report = { + "base_url": "https://www.fucheers.top/v1", + "protocol": "openai", + "target_model": "gpt-5.5", + "total_score": 91.0, + "verdict": "marginal", + "results": [ + {"status": "pass"} for _ in range(7) + ] + [ + {"status": "fail"} for _ in range(2) + ], + } + report.update(overrides) + return report + + +def test_seo_title_includes_target_model(): + """Each report's title must embed the detected model so reports across + the same relay become distinct indexable pages.""" + meta = _seo_meta_for_report(_base_report()) + title = meta["seo_title"] + + assert "www.fucheers.top" in title + assert "OpenAI" in title + assert "gpt-5.5" in title, f"model missing from title: {title!r}" + assert "91/100" in title + assert "存在风险" in title + + +def test_seo_title_falls_back_when_target_model_missing(): + """Legacy reports / probe failures may not have target_model. Title + must still render coherently — must NOT print empty 'OpenAI 中转站 检测:'.""" + meta = _seo_meta_for_report(_base_report(target_model="")) + title = meta["seo_title"] + + # No leading/trailing extra space, no " " (double space) artifact + assert " " not in title, f"double-space artifact in fallback title: {title!r}" + assert "www.fucheers.top" in title + assert "OpenAI 中转站检测" in title + assert "91/100" in title + + +def test_seo_title_distinct_per_model_same_relay(): + """Regression for the long-tail SEO intent: same domain + same score + + different model should yield different titles (otherwise Google + de-duplicates them and we lose the indexable surface).""" + a = _seo_meta_for_report(_base_report(target_model="gpt-5.5"))["seo_title"] + b = _seo_meta_for_report(_base_report(target_model="gpt-5.4-mini"))["seo_title"] + c = _seo_meta_for_report(_base_report(target_model="gpt-4o"))["seo_title"] + + assert a != b != c + assert "gpt-5.5" in a + assert "gpt-5.4-mini" in b + assert "gpt-4o" in c + + +def test_seo_title_respects_155_char_cap(): + """The 155-char cap is a hard SEO ceiling — long model snapshot IDs + must not push the title over.""" + meta = _seo_meta_for_report(_base_report(target_model="gpt-5.5-2026-04-23")) + assert len(meta["seo_title"]) <= 155 + assert "gpt-5.5-2026-04-23" in meta["seo_title"] + + +def test_seo_description_still_mentions_model(): + """The description block already mentioned model before this change — + guarding it here so a future title refactor doesn't accidentally drop + it from the meta description.""" + meta = _seo_meta_for_report(_base_report(target_model="gpt-5.5")) + assert "gpt-5.5" in meta["seo_description"] diff --git a/web/server.py b/web/server.py index 938c646..c188896 100644 --- a/web/server.py +++ b/web/server.py @@ -591,9 +591,18 @@ def _seo_meta_for_report(report: dict) -> dict[str, str]: fail_count = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "fail") total = len(results) - title = ( - f"{domain} {proto_label} 中转站检测:{score:.0f}/100 {verdict_zh} | Veridrop" - ) + # Title diversity drives long-tail SEO: each report becomes its own + # indexable page with a distinct query target ("X 站测 gpt-5.5 怎么样"). + # Without the model, every report on the same domain shared one title. + if model: + title = ( + f"{domain} {proto_label} 中转站 {model} 检测:" + f"{score:.0f}/100 {verdict_zh} | Veridrop" + ) + else: + title = ( + f"{domain} {proto_label} 中转站检测:{score:.0f}/100 {verdict_zh} | Veridrop" + ) description = ( f"对 {domain} 进行 {proto_label} 中转站检测的完整报告:" f"模型 {model},总分 {score:.0f}/100,判定为「{verdict_zh}」。" diff --git a/web/templates/result.html b/web/templates/result.html index f871475..9cfa78b 100644 --- a/web/templates/result.html +++ b/web/templates/result.html @@ -22,9 +22,12 @@ {% endblock %} {% block content %} -{# Breadcrumb: 首页 › 红黑榜 › {domain} › 报告 #{job_id}. +{# Breadcrumb: 首页 › 红黑榜 › {domain} › {target_model}. When the report's base_url didn't yield a valid domain (legacy reports, - garbled URLs), drop the {domain} crumb and link straight to /leaderboard. #} + garbled URLs), drop the {domain} crumb and link straight to /leaderboard. + The trailing crumb shows the detected model so each report is uniquely + identifiable in nav and crawler-readable for long-tail SEO; falls back + to "报告 #{job_id}" when target_model is missing. #} {% set perf = report.performance or {} %}