From 39c5fa4e470e9ba39f93a4b0b89e358b91e09bca Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 6 Nov 2025 19:46:22 +0000 Subject: [PATCH] Refactor: Improve Wikidata integration and error handling This commit introduces several improvements to the Wikidata integration: - **Error Handling:** Implemented robust error handling for Wikidata API requests, including timeouts, connection errors, and unexpected responses. This ensures a more stable user experience. - **Localization:** Added localized prompts and error messages for better multi-language support. - **Caching:** Introduced caching for Wikidata entity lookups to improve performance. - **Code Structure:** Refactored the `wikidata_helpers` module for better organization and readability. - **UI Enhancements:** Updated the frontend to display error messages and loading states more effectively. Co-authored-by: doosahyasno --- app.py | 100 ++++++++++++++++--- static/style.css | 29 ++++++ templates/index.html | 197 ++++++++++++++++++++++++-------------- utils/wikidata_helpers.py | 87 +++++++++++++---- 4 files changed, 312 insertions(+), 101 deletions(-) diff --git a/app.py b/app.py index 0a0b395..9a87347 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,4 @@ # app.py (ссылки и кнопка «Показать ещё» работают) -import json from flask import Flask, render_template, request, jsonify, session import spacy from utils.wikidata_helpers import find_and_describe @@ -11,6 +10,47 @@ nlp_ru = spacy.load("ru_core_news_sm") nlp_en = spacy.load("en_core_web_sm") +ASK_PROMPTS = { + "ru": "Пожалуйста, введите вопрос.", + "en": "Please enter a question.", + "zh": "请输入问题。", +} + +EMPTY_RESPONSES = { + "ru": "Ничего не найдено. Попробуй переформулировать.", + "en": "Nothing was found. Try rephrasing your question.", + "zh": "没有找到结果,请尝试换一种问法。", +} + +ERROR_RESPONSES = { + "timeout": { + "ru": "Wikidata долго не отвечает. Попробуй ещё раз чуть позже.", + "en": "Wikidata is taking too long to respond. Try again in a moment.", + "zh": "Wikidata 响应超时,请稍后再试。", + }, + "request": { + "ru": "Не удалось подключиться к Wikidata. Проверь соединение и повтори попытку.", + "en": "Unable to reach Wikidata right now. Please check your connection and retry.", + "zh": "目前无法连接到 Wikidata,请检查网络后重试。", + }, + "decode": { + "ru": "Wikidata прислала неожиданный ответ. Попробуй позже ещё раз.", + "en": "Wikidata returned an unexpected response. Please try again later.", + "zh": "Wikidata 返回了异常数据,请稍后再试。", + }, +} + +GENERIC_ERROR = { + "ru": "Не получилось получить данные из Wikidata. Попробуй ещё раз позже.", + "en": "Could not retrieve data from Wikidata. Please try again later.", + "zh": "未能从 Wikidata 获取数据,请稍后再试。", +} + + +def localize(table, lang, fallback): + return table.get(lang) if isinstance(table, dict) and table.get(lang) else fallback + + @app.route("/") def index(): lang = session.get("lang") or request.accept_languages.best_match(LANGUAGES.keys()) or "ru" @@ -30,22 +70,39 @@ def ask(): lang = session.get("lang", "ru") if not question: - return jsonify({"answer": "Пожалуйста, введите вопрос."}) + return jsonify({ + "answer": localize(ASK_PROMPTS, lang, ASK_PROMPTS["en"]), + "status": "empty", + "more": False, + }) nlp = nlp_ru if lang == "ru" else nlp_en doc = nlp(question) query = next((ent.text for ent in doc.ents if ent.label_ in {"PER", "ORG", "LOC"}), question) - found = find_and_describe(query, lang) + result = find_and_describe(query, lang) + + if result["error"]: + err_table = ERROR_RESPONSES.get(result["error"], GENERIC_ERROR) + return jsonify({ + "answer": localize(err_table, lang, GENERIC_ERROR["en"]), + "status": "error", + "more": False, + }) - if not found: - return jsonify({"answer": "Ничего не найдено. Попробуй переформулировать."}) + items = result["items"] or [] + if not items: + return jsonify({ + "answer": localize(EMPTY_RESPONSES, lang, EMPTY_RESPONSES["en"]), + "status": "empty", + "more": False, + }) - if len(found) == 1: - text = found[0]["text"] + if len(items) == 1: + text = items[0]["text"] more = False else: - show = found[:3] - rest = found[3:] + show = items[:3] + rest = items[3:] lines = ["Я нашёл несколько объектов:\n"] for f in show: lines.append(f"• **{f['label']}** — {f['descr']}\n[🔗 Источник]({f['url']})") @@ -54,19 +111,36 @@ def ask(): text = "\n".join(lines) more = bool(rest) - return jsonify({"answer": text, "more": more, "question": question}) + return jsonify({"answer": text, "more": more, "question": question, "status": "ok"}) @app.route("/more", methods=["POST"]) def more(): data = request.get_json() or {} question = data.get("question", "").strip() lang = session.get("lang", "ru") - found = find_and_describe(question, lang) + result = find_and_describe(question, lang) + + if result["error"]: + err_table = ERROR_RESPONSES.get(result["error"], GENERIC_ERROR) + return jsonify({ + "answer": localize(err_table, lang, GENERIC_ERROR["en"]), + "status": "error", + "more": False, + }) + + items = result["items"] or [] + if not items: + return jsonify({ + "answer": localize(EMPTY_RESPONSES, lang, EMPTY_RESPONSES["en"]), + "status": "empty", + "more": False, + }) + lines = [] - for f in found: + for f in items: lines.append(f"• **{f['label']}** — {f['descr']}\n{f['text']}") full_text = "\n\n".join(lines) - return jsonify({"answer": full_text, "more": False}) + return jsonify({"answer": full_text, "more": False, "status": "ok"}) if __name__ == "__main__": app.run(debug=True, host="127.0.0.1", port=5000) diff --git a/static/style.css b/static/style.css index 345ff0f..515a96b 100644 --- a/static/style.css +++ b/static/style.css @@ -84,6 +84,14 @@ h1 { border-top-left-radius: 4px; } +.message-error { + border-left: 3px solid #ff6b6b; +} + +.message-info { + border-left: 3px solid #4dabf7; +} + .message-time { font-size: 0.7rem; color: #777; @@ -143,6 +151,27 @@ h1 { cursor: not-allowed; } +.show-more-button { + align-self: flex-start; + margin-left: 20px; + padding: 6px 14px; + border-radius: 12px; + border: 1px solid #fbec5d; + background: transparent; + color: #fbec5d; + cursor: pointer; + transition: background 0.2s, color 0.2s; +} + +.show-more-button:hover { + background: rgba(251, 236, 93, 0.15); +} + +.show-more-button:disabled { + opacity: 0.6; + cursor: not-allowed; +} + .examples { display: flex; gap: 10px; diff --git a/templates/index.html b/templates/index.html index e123df4..1c18201 100644 --- a/templates/index.html +++ b/templates/index.html @@ -43,82 +43,135 @@

🧠 Wikidata AI

- + + - sendButton.onclick = sendMessage; - messageInput.onkeypress = e => {if(e.key==="Enter") sendMessage();}; - - - \ No newline at end of file diff --git a/utils/wikidata_helpers.py b/utils/wikidata_helpers.py index 662bcaa..7c00b8c 100644 --- a/utils/wikidata_helpers.py +++ b/utils/wikidata_helpers.py @@ -1,15 +1,38 @@ # utils/wikidata_helpers.py (полностью новый код) -import requests, itertools, re +import logging +import requests from datetime import datetime -from typing import List, Dict, Optional +from functools import lru_cache +from typing import Dict, List, Optional HEADERS = {"User-Agent": "FreeTextWikidataBot/1.0 (https://example.com; admin@example.com)"} API = "https://www.wikidata.org/w/api.php" WIKIDATA_ITEM = "https://www.wikidata.org/entity/{qid}" +logger = logging.getLogger(__name__) + +class WikidataAPIError(RuntimeError): + """Ошибка при обращении к Wikidata API.""" + + def __init__(self, reason: str): + super().__init__(reason) + self.reason = reason + # ---------- базовые вспомогательные ---------- -def wd_get(url: str, params: dict, timeout=8): - return requests.get(url, params, headers=HEADERS, timeout=timeout).json() +def wd_get(url: str, params: dict, timeout: int = 8) -> dict: + try: + response = requests.get(url, params=params, headers=HEADERS, timeout=timeout) + response.raise_for_status() + return response.json() + except requests.exceptions.Timeout: + logger.warning("Wikidata request timeout for params=%s", params) + return {"_error": "timeout"} + except requests.exceptions.RequestException as exc: + logger.warning("Wikidata request failed for params=%s: %s", params, exc) + return {"_error": "request", "details": str(exc)} + except ValueError as exc: # JSON decode error + logger.warning("Wikidata response is not JSON for params=%s: %s", params, exc) + return {"_error": "decode"} def label_for(qid: str, lang: str = "ru") -> str: """Кешируем лейблы, чтобы не дублировать запросы.""" @@ -19,6 +42,8 @@ def label_for(qid: str, lang: str = "ru") -> str: return label_for._cache[qid] data = wd_get(API, {"action": "wbgetentities", "ids": qid, "props": "labels", "languages": f"{lang}|en", "format": "json"}) + if data.get("_error"): + return qid lbl = (data["entities"].get(qid, {}).get("labels", {}).get(lang) or data["entities"].get(qid, {}).get("labels", {}).get("en") or {}).get("value", qid) @@ -26,11 +51,16 @@ def label_for(qid: str, lang: str = "ru") -> str: return lbl # ---------- поиск сущностей ---------- +@lru_cache(maxsize=256) def search_entities(query: str, lang: str = "ru", limit: int = 5) -> List[Dict]: """Возвращает список словарей {'qid': '..', 'label': '..', 'descr': '..'}""" params = {"action": "wbsearchentities", "search": query, "language": lang, "uselang": lang, "format": "json", "limit": limit} data = wd_get(API, params) + if not data: + return [] + if data.get("_error"): + raise WikidataAPIError(data["_error"]) results = [] for item in data.get("search", []): results.append({"qid": item["id"], @@ -44,9 +74,13 @@ def search_entities(query: str, lang: str = "ru", limit: int = 5) -> List[Dict]: # ---------- универсальный «человекочитаемый» сбор утверждений ---------- def free_text_description(qid: str, lang: str = "ru") -> str: """Собирает из объекта максимально развернутое, но читаемое описание.""" - ent = wd_get(API, {"action": "wbgetentities", "ids": qid, - "languages": f"{lang}|en", "props": "labels|descriptions|claims", - "format": "json"})["entities"].get(qid, {}) + data = wd_get(API, {"action": "wbgetentities", "ids": qid, + "languages": f"{lang}|en", "props": "labels|descriptions|claims", + "format": "json"}) + if data.get("_error"): + raise WikidataAPIError(data["_error"]) + entities = data.get("entities") if isinstance(data, dict) else {} + ent = (entities or {}).get(qid, {}) if not ent: return "Объект не найден." @@ -114,15 +148,36 @@ def human_date(ts: str) -> str: return ts[1:11] # ---------- высокоуровневые вызовы из Flask ---------- -def find_and_describe(query: str, lang: str = "ru"): - """Главная точка входа: ищем, возвращаем список {'label','descr','text','url'}""" +def find_and_describe(query: str, lang: str = "ru") -> Dict[str, Optional[List[Dict[str, str]]]]: + """Главная точка входа: ищем, возвращаем {'items': [...], 'error': }""" + if not query: + return {"items": [], "error": None} + try: + cached = _cached_find_and_describe(query.strip(), lang) + except WikidataAPIError as exc: + return {"items": [], "error": exc.reason} + # конвертируем tuple -> list[dict] + return {"items": [{"label": item["label"], + "descr": item["descr"], + "text": item["text"], + "url": item["url"]} for item in cached], + "error": None} + + +@lru_cache(maxsize=128) +def _cached_find_and_describe(query: str, lang: str): hits = search_entities(query, lang) if not hits: - return [] - results = [] + return tuple() + results: List[Dict[str, str]] = [] for h in hits: - results.append({"label": h["label"], - "descr": h["descr"], - "text": free_text_description(h["qid"], lang), - "url": WIKIDATA_ITEM.format(qid=h["qid"])}) - return results \ No newline at end of file + description = free_text_description(h["qid"], lang) + if not description or description == "Объект не найден.": + logger.debug("Empty description for %s, lang=%s", h["qid"], lang) + results.append({ + "label": h.get("label", ""), + "descr": h.get("descr", ""), + "text": description, + "url": WIKIDATA_ITEM.format(qid=h["qid"]), + }) + return tuple(results)