Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
OPENAI_API_KEY=
OPENAI_ENABLED=false
OPENAI_API_KEY=
OPENAI_MODEL=gpt-4o-mini
OPENAI_BASE_URL=https://api.openai.com/v1
OPENAI_TIMEOUT_SECONDS=60
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ GACHI 프로젝트의 AI 서버입니다. BE와 분리된 FastAPI 애플리케

- 가정통신문 원문과 날짜 후보를 기반으로 제목, 요약, 일정/마감/체크리스트 항목을 분석합니다.
- AI 서버는 분석 결과 JSON만 반환하고, DB 저장은 BE가 담당합니다.
- OpenAI API 연결 전에도 검증할 수 있도록 비용 없는 rule-based baseline과 prompt-preview API를 제공합니다.
- `OPENAI_ENABLED=true`일 때 OpenAI API를 호출하고, 기본값에서는 비용 없는 rule-based baseline으로 응답합니다.
- 기존 baseline API와 prompt-preview API를 유지합니다.

## 로컬 실행

Expand Down Expand Up @@ -42,5 +43,5 @@ Windows PowerShell에서는 다음처럼 가상환경을 활성화합니다.

- `docs/env.md`: 환경변수
- `docs/deploy.md`: Docker image와 EC2 배포 방식
- `docs/newsletter-extraction.md`: 가정통신문 분석 API 스펙과 프롬프트 흐름
- `docs/newsletter-extraction.md`: 가정통신문 분석 구현 경계와 유지 결정
- `docs/newsletter-labeling-guide.md`: 정답 데이터 라벨링 기준
54 changes: 54 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os
from dataclasses import dataclass


def _read_bool(name: str, default: bool = False) -> bool:
value = os.getenv(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}


def _read_str(name: str, default: str | None = None) -> str | None:
value = os.getenv(name)
if value is None:
return default
normalized = value.strip()
return normalized or default


def _read_float(name: str, default: float, *, min_value: float | None = None) -> float:
value = os.getenv(name)
if value is None or value.strip() == "":
return default
try:
parsed = float(value)
if min_value is not None and parsed < min_value:
return default
return parsed
except ValueError:
return default


@dataclass(frozen=True)
class OpenAISettings:
enabled: bool
api_key: str | None
model: str
base_url: str
timeout_seconds: float

@classmethod
def from_env(cls) -> "OpenAISettings":
return cls(
enabled=_read_bool("OPENAI_ENABLED", default=False),
api_key=_read_str("OPENAI_API_KEY"),
model=_read_str("OPENAI_MODEL", "gpt-4o-mini") or "gpt-4o-mini",
base_url=_read_str("OPENAI_BASE_URL", "https://api.openai.com/v1")
or "https://api.openai.com/v1",
timeout_seconds=_read_float("OPENAI_TIMEOUT_SECONDS", 60.0, min_value=0.001),
)


def get_openai_settings() -> OpenAISettings:
return OpenAISettings.from_env()
16 changes: 14 additions & 2 deletions app/routers/newsletters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException, status

from app.schemas import (
NewsletterAnalysisRequest,
Expand All @@ -9,13 +9,25 @@
)
from app.services.newsletter_extractor import analyze_newsletter, extract_newsletter_items
from app.services.newsletter_prompt import ANALYSIS_RESPONSE_SCHEMA, build_prompt_messages
from app.services.openai_adapter import OpenAIAdapterError, OpenAIConfigurationError

router = APIRouter(prefix="/ai/newsletters", tags=["newsletters"])


@router.post("/analyze", response_model=NewsletterAnalysisResponse)
def analyze(req: NewsletterAnalysisRequest) -> NewsletterAnalysisResponse:
return analyze_newsletter(req)
try:
return analyze_newsletter(req)
except OpenAIConfigurationError as exc:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=str(exc),
) from exc
except OpenAIAdapterError as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=str(exc),
) from exc


@router.post("/extract-items", response_model=NewsletterExtractionResponse)
Expand Down
20 changes: 20 additions & 0 deletions app/services/newsletter_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import re
from collections.abc import Iterable

from app.config import get_openai_settings
from app.schemas import (
DateCandidate,
DateStatus,
Expand All @@ -12,6 +14,9 @@
NewsletterExtractionResponse,
SelectedDateCandidate,
)
from app.services.openai_adapter import OpenAINewsletterAdapter

logger = logging.getLogger(__name__)

DEADLINE_KEYWORDS = (
"마감",
Expand Down Expand Up @@ -62,6 +67,21 @@ def extract_newsletter_items(
def analyze_newsletter(
request: NewsletterAnalysisRequest,
) -> NewsletterAnalysisResponse:
settings = get_openai_settings()
if settings.enabled:
logger.info("[NewsletterAnalysis] OpenAI 분석 모드로 실행합니다. model=%s", settings.model)
response = OpenAINewsletterAdapter(settings).analyze(request)
meta = dict(response.meta)
meta.update(
{
"mode": "openai",
"model": settings.model,
"dateCandidateCount": len(request.date_candidates),
"requiresLLMReview": False,
}
)
return response.model_copy(update={"meta": meta})

items = _extract_items(request)
return NewsletterAnalysisResponse(
title=_extract_document_title(request),
Expand Down
126 changes: 126 additions & 0 deletions app/services/openai_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import json
import logging
import urllib.error
import urllib.request
from typing import Any

from pydantic import ValidationError

from app.config import OpenAISettings
from app.schemas import NewsletterAnalysisRequest, NewsletterAnalysisResponse
from app.services.newsletter_prompt import ANALYSIS_RESPONSE_SCHEMA, build_prompt_messages

logger = logging.getLogger(__name__)


class OpenAIAdapterError(RuntimeError):
pass


class OpenAIConfigurationError(OpenAIAdapterError):
pass


class OpenAINewsletterAdapter:
def __init__(self, settings: OpenAISettings) -> None:
self.settings = settings

def analyze(self, request: NewsletterAnalysisRequest) -> NewsletterAnalysisResponse:
if not self.settings.api_key:
raise OpenAIConfigurationError("OPENAI_API_KEY가 설정되어 있지 않습니다.")

payload = {
"model": self.settings.model,
"input": build_prompt_messages(request),
"text": {
"format": {
"type": "json_schema",
"name": "newsletter_analysis",
"schema": ANALYSIS_RESPONSE_SCHEMA,
"strict": False,
}
},
}

response_body = self._post_json("/responses", payload)
parsed = self._extract_output_json(response_body)
try:
return NewsletterAnalysisResponse.model_validate(parsed)
except ValidationError as exc:
logger.warning("[OpenAIAdapter] 응답 스키마 검증 실패. error=%s", exc)
raise OpenAIAdapterError("OpenAI 응답이 분석 스키마와 일치하지 않습니다.") from exc

def _post_json(self, path: str, payload: dict[str, Any]) -> dict[str, Any]:
url = self.settings.base_url.rstrip("/") + path
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
req = urllib.request.Request(
url,
data=body,
method="POST",
headers={
"Authorization": f"Bearer {self.settings.api_key}",
"Content-Type": "application/json",
},
)

try:
with urllib.request.urlopen(req, timeout=self.settings.timeout_seconds) as response:
return json.loads(response.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
error_body = exc.read().decode("utf-8", errors="replace")
logger.warning(
"[OpenAIAdapter] OpenAI 호출 실패. status=%s, body_length=%s",
exc.code,
len(error_body),
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
raise OpenAIAdapterError(f"OpenAI 호출 실패. status={exc.code}") from exc
except urllib.error.URLError as exc:
logger.warning("[OpenAIAdapter] OpenAI 통신 오류. reason=%s", exc.reason)
raise OpenAIAdapterError("OpenAI 통신 오류가 발생했습니다.") from exc
except TimeoutError as exc:
logger.warning(
"[OpenAIAdapter] OpenAI 호출 timeout. timeout=%s",
self.settings.timeout_seconds,
)
raise OpenAIAdapterError("OpenAI 호출 시간이 초과되었습니다.") from exc
except json.JSONDecodeError as exc:
logger.warning("[OpenAIAdapter] OpenAI 응답 JSON 파싱 실패. error=%s", exc)
raise OpenAIAdapterError("OpenAI 응답을 JSON으로 해석할 수 없습니다.") from exc

def _extract_output_json(self, response_body: dict[str, Any]) -> dict[str, Any]:
output_text = response_body.get("output_text")
if isinstance(output_text, str) and output_text.strip():
return self._loads_model_json(output_text)

outputs = response_body.get("output", [])
if not isinstance(outputs, list):
raise OpenAIAdapterError("OpenAI 응답의 output 형식이 올바르지 않습니다.")

for output in outputs:
if not isinstance(output, dict):
continue
contents = output.get("content", [])
if not isinstance(contents, list):
continue
for content in contents:
if not isinstance(content, dict):
continue
text = content.get("text")
if isinstance(text, str) and text.strip():
return self._loads_model_json(text)

raise OpenAIAdapterError("OpenAI 응답에서 출력 텍스트를 찾을 수 없습니다.")

def _loads_model_json(self, value: str) -> dict[str, Any]:
try:
parsed = json.loads(value)
except json.JSONDecodeError as exc:
logger.warning(
"[OpenAIAdapter] 모델 출력 JSON 파싱 실패. output_length=%s",
len(value),
)
raise OpenAIAdapterError("OpenAI 모델 출력이 JSON 형식이 아닙니다.") from exc

if not isinstance(parsed, dict):
raise OpenAIAdapterError("OpenAI 모델 출력이 JSON object가 아닙니다.")
return parsed
18 changes: 12 additions & 6 deletions docs/env.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# AI 서버 환경변수

## 필수
## OpenAI 호출

- `OPENAI_API_KEY`: 추후 LLM API 호출을 붙일 때 사용할 OpenAI API key
- `OPENAI_ENABLED`: OpenAI 실제 호출 활성화 여부. 기본값은 `false`
- `OPENAI_API_KEY`: OpenAI API key. `OPENAI_ENABLED=true`일 때 필요
- `OPENAI_MODEL`: 사용할 모델. 기본값은 `gpt-4o-mini`
- `OPENAI_BASE_URL`: OpenAI API base URL. 기본값은 `https://api.openai.com/v1`
- `OPENAI_TIMEOUT_SECONDS`: OpenAI 호출 timeout 초. 기본값은 `60`

## 선택
비용 방지를 위해 로컬과 배포 기본값은 `OPENAI_ENABLED=false`로 둔다.
이 상태에서 `/ai/newsletters/analyze`는 기존 rule-based baseline으로 응답한다.

- `LOG_LEVEL`: 로그 레벨. 기본값은 `INFO`
`OPENAI_ENABLED=true`인데 `OPENAI_API_KEY`가 없으면 `/ai/newsletters/analyze`는 `503`을 반환한다.
OpenAI 호출 또는 응답 파싱이 실패하면 `502`를 반환하고, 로그에는 상태 코드나 예외 사유를 남긴다.

## 현재 상태
## 기타

현재 구현은 OpenAI API를 직접 호출하지 않습니다. `OPENAI_API_KEY`는 기존 EC2 compose 환경과 향후 LLM client 연결을 고려해 유지합니다.
- `LOG_LEVEL`: 로그 레벨. 기본값은 `INFO`
Loading