From 4889721606effdef30b0cf889d244351f521a9dc Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 14:20:38 +0100 Subject: [PATCH 01/10] fix: removed most of schema encode/decode overhead --- backend/app/core/providers.py | 13 +-- .../admin/admin_events_repository.py | 6 +- .../app/db/repositories/event_repository.py | 20 ++-- backend/app/domain/events/__init__.py | 4 +- backend/app/domain/events/typed.py | 2 +- backend/app/events/broker.py | 26 ++--- backend/app/events/event_store.py | 21 ++-- backend/app/events/schema/schema_registry.py | 103 ++---------------- .../services/event_replay/replay_service.py | 7 +- backend/app/services/kafka_event_service.py | 4 +- backend/tests/e2e/dlq/test_dlq_manager.py | 7 +- .../e2e/events/test_schema_registry_real.py | 9 +- .../events/test_schema_registry_roundtrip.py | 24 ++-- .../events/test_event_schema_coverage.py | 15 ++- .../events/test_schema_registry_manager.py | 22 ++-- 15 files changed, 78 insertions(+), 205 deletions(-) diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py index a1824dda..3e4ed38d 100644 --- a/backend/app/core/providers.py +++ b/backend/app/core/providers.py @@ -276,20 +276,17 @@ class EventProvider(Provider): scope = Scope.APP @provide - async def get_schema_registry(self, settings: Settings, logger: logging.Logger) -> SchemaRegistryManager: - registry = SchemaRegistryManager(settings, logger) - await registry.initialize_schemas() - return registry + def get_schema_registry(self, settings: Settings, logger: logging.Logger) -> SchemaRegistryManager: + return SchemaRegistryManager(settings, logger) @provide def get_event_store( self, - schema_registry: SchemaRegistryManager, logger: logging.Logger, event_metrics: EventMetrics, ) -> EventStore: return create_event_store( - schema_registry=schema_registry, logger=logger, event_metrics=event_metrics, ttl_days=90 + logger=logger, event_metrics=event_metrics, ttl_days=90 ) @@ -874,14 +871,12 @@ def get_event_replay_service( self, replay_repository: ReplayRepository, kafka_producer: UnifiedProducer, - event_store: EventStore, replay_metrics: ReplayMetrics, logger: logging.Logger, ) -> EventReplayService: return EventReplayService( repository=replay_repository, producer=kafka_producer, - event_store=event_store, replay_metrics=replay_metrics, logger=logger, ) @@ -901,7 +896,6 @@ async def get_event_replay_service( self, replay_repository: ReplayRepository, kafka_producer: UnifiedProducer, - event_store: EventStore, replay_metrics: ReplayMetrics, logger: logging.Logger, database: Database, @@ -910,7 +904,6 @@ async def get_event_replay_service( service = EventReplayService( repository=replay_repository, producer=kafka_producer, - event_store=event_store, replay_metrics=replay_metrics, logger=logger, ) diff --git a/backend/app/db/repositories/admin/admin_events_repository.py b/backend/app/db/repositories/admin/admin_events_repository.py index 800974fb..380ebcea 100644 --- a/backend/app/db/repositories/admin/admin_events_repository.py +++ b/backend/app/db/repositories/admin/admin_events_repository.py @@ -17,6 +17,7 @@ from app.domain.enums.replay import ReplayStatus from app.domain.events import ( DomainEvent, + DomainEventAdapter, EventBrowseResult, EventDetail, EventExportRow, @@ -26,7 +27,6 @@ EventTypeCount, HourlyEventCount, UserEventCount, - domain_event_adapter, ) from app.domain.replay.models import ReplayFilter, ReplaySessionState @@ -59,7 +59,7 @@ async def browse_events( total = await query.count() docs = await query.sort([(sort_by, sort_order)]).skip(skip).limit(limit).to_list() - events = [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] return EventBrowseResult(events=events, total=total, skip=skip, limit=limit) @@ -68,7 +68,7 @@ async def get_event_detail(self, event_id: str) -> EventDetail | None: if not doc: return None - event = domain_event_adapter.validate_python(doc, from_attributes=True) + event = DomainEventAdapter.validate_python(doc, from_attributes=True) related_query = {"metadata.correlation_id": doc.metadata.correlation_id, "event_id": {"$ne": event_id}} related_docs = await ( diff --git a/backend/app/db/repositories/event_repository.py b/backend/app/db/repositories/event_repository.py index 6eb20841..7b250953 100644 --- a/backend/app/db/repositories/event_repository.py +++ b/backend/app/db/repositories/event_repository.py @@ -14,13 +14,13 @@ from app.domain.events import ( ArchivedEvent, DomainEvent, + DomainEventAdapter, EventAggregationResult, EventListResult, EventReplayInfo, EventStatistics, EventTypeCount, ServiceEventCount, - domain_event_adapter, ) @@ -73,7 +73,7 @@ async def get_event(self, event_id: str) -> DomainEvent | None: doc = await EventDocument.find_one(EventDocument.event_id == event_id) if not doc: return None - return domain_event_adapter.validate_python(doc, from_attributes=True) + return DomainEventAdapter.validate_python(doc, from_attributes=True) async def get_events_by_type( self, @@ -94,7 +94,7 @@ async def get_events_by_type( .limit(limit) .to_list() ) - return [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] async def get_events_by_aggregate( self, aggregate_id: str, event_types: list[EventType] | None = None, limit: int = 100 @@ -105,7 +105,7 @@ async def get_events_by_aggregate( docs = ( await EventDocument.find(*conditions).sort([("timestamp", SortDirection.ASCENDING)]).limit(limit).to_list() ) - return [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] async def get_events_by_correlation( self, correlation_id: str, limit: int = 100, skip: int = 0, user_id: str | None = None, @@ -119,7 +119,7 @@ async def get_events_by_correlation( .sort([("timestamp", SortDirection.ASCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(condition).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -152,7 +152,7 @@ async def get_events_by_user( .limit(limit) .to_list() ) - return [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] async def get_execution_events( self, execution_id: str, limit: int = 100, skip: int = 0, exclude_system_events: bool = False @@ -172,7 +172,7 @@ async def get_execution_events( .sort([("timestamp", SortDirection.ASCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(*conditions).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -307,7 +307,7 @@ async def get_user_events_paginated( .sort([("timestamp", sort_direction)]) .skip(skip).limit(limit).to_list() ) - events = [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(*conditions).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -334,7 +334,7 @@ async def query_events( .sort([(sort_field, SortDirection.DESCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [domain_event_adapter.validate_python(d, from_attributes=True) for d in docs] + events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(query).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -398,7 +398,7 @@ async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo ) async for doc in EventDocument.aggregate(pipeline.export()): - events = [domain_event_adapter.validate_python(e) for e in doc["events"]] + events = [DomainEventAdapter.validate_python(e) for e in doc["events"]] return EventReplayInfo( events=events, event_count=doc["event_count"], diff --git a/backend/app/domain/events/__init__.py b/backend/app/domain/events/__init__.py index d537a39e..2a9bc41c 100644 --- a/backend/app/domain/events/__init__.py +++ b/backend/app/domain/events/__init__.py @@ -29,6 +29,7 @@ CreatePodCommandEvent, DeletePodCommandEvent, DomainEvent, + DomainEventAdapter, EventMetadata, # Execution Events ExecutionAcceptedEvent, @@ -90,7 +91,6 @@ UserRegisteredEvent, UserSettingsUpdatedEvent, UserUpdatedEvent, - domain_event_adapter, ) __all__ = [ @@ -119,7 +119,7 @@ "DomainEvent", "EventMetadata", "ResourceUsageDomain", - "domain_event_adapter", + "DomainEventAdapter", # Execution Events "ExecutionRequestedEvent", "ExecutionAcceptedEvent", diff --git a/backend/app/domain/events/typed.py b/backend/app/domain/events/typed.py index 6212fd6c..c230b9d4 100644 --- a/backend/app/domain/events/typed.py +++ b/backend/app/domain/events/typed.py @@ -703,4 +703,4 @@ class ArchivedEvent(AvroBase): ] # TypeAdapter for polymorphic loading - validates raw data to correct typed event -domain_event_adapter: TypeAdapter[DomainEvent] = TypeAdapter(DomainEvent) +DomainEventAdapter: TypeAdapter[DomainEvent] = TypeAdapter(DomainEvent) diff --git a/backend/app/events/broker.py b/backend/app/events/broker.py index 3a6e3336..01e70da4 100644 --- a/backend/app/events/broker.py +++ b/backend/app/events/broker.py @@ -4,36 +4,24 @@ from faststream import StreamMessage from faststream.kafka import KafkaBroker -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import DomainEvent, DomainEventAdapter from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings -def create_avro_decoder( - schema_registry: SchemaRegistryManager, -) -> Any: - """Create a custom Avro decoder closure for FastStream subscribers. - - The decoder receives a StreamMessage whose body is Confluent wire-format - Avro bytes (magic byte + 4-byte schema ID + Avro payload). We delegate - deserialization to SchemaRegistryManager which resolves the schema from - the registry and decodes into the concrete DomainEvent subclass. - """ - - async def avro_decoder(msg: StreamMessage[Any]) -> DomainEvent: - return await schema_registry.deserialize_event(msg.body, msg.raw_message.topic) - - return avro_decoder - - def create_broker( settings: Settings, schema_registry: SchemaRegistryManager, logger: logging.Logger, ) -> KafkaBroker: """Create a KafkaBroker with Avro decoder for standalone workers.""" + + async def avro_decoder(msg: StreamMessage[Any]) -> DomainEvent: + payload = await schema_registry.serializer.decode_message(msg.body) + return DomainEventAdapter.validate_python(payload) + return KafkaBroker( settings.KAFKA_BOOTSTRAP_SERVERS, - decoder=create_avro_decoder(schema_registry), + decoder=avro_decoder, logger=logger, ) diff --git a/backend/app/events/event_store.py b/backend/app/events/event_store.py index 026ae84a..8a1fa574 100644 --- a/backend/app/events/event_store.py +++ b/backend/app/events/event_store.py @@ -12,21 +12,18 @@ from app.core.tracing.utils import add_span_attributes from app.db.docs import EventDocument from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent -from app.events.schema.schema_registry import SchemaRegistryManager +from app.domain.events.typed import DomainEvent, DomainEventAdapter class EventStore: def __init__( self, - schema_registry: SchemaRegistryManager, logger: logging.Logger, event_metrics: EventMetrics, ttl_days: int = 90, batch_size: int = 100, ): self.metrics = event_metrics - self.schema_registry = schema_registry self.logger = logger self.ttl_days = ttl_days self.batch_size = batch_size @@ -115,7 +112,7 @@ async def get_event(self, event_id: str) -> DomainEvent | None: if not doc: return None - event = self.schema_registry.deserialize_json(doc.model_dump()) + event = DomainEventAdapter.validate_python(doc.model_dump()) duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_by_id", "event_store") @@ -141,7 +138,7 @@ async def get_events_by_type( .limit(limit) .to_list() ) - events = [self.schema_registry.deserialize_json(doc.model_dump()) for doc in docs] + events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_by_type", "event_store") @@ -158,7 +155,7 @@ async def get_execution_events( query["event_type"] = {"$in": event_types} docs = await EventDocument.find(query).sort([("timestamp", SortDirection.ASCENDING)]).to_list() - events = [self.schema_registry.deserialize_json(doc.model_dump()) for doc in docs] + events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_execution_events", "event_store") @@ -180,7 +177,7 @@ async def get_user_events( query["timestamp"] = tr docs = await EventDocument.find(query).sort([("timestamp", SortDirection.DESCENDING)]).limit(limit).to_list() - events = [self.schema_registry.deserialize_json(doc.model_dump()) for doc in docs] + events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_user_events", "event_store") @@ -201,7 +198,7 @@ async def get_security_events( query["timestamp"] = tr docs = await EventDocument.find(query).sort([("timestamp", SortDirection.DESCENDING)]).limit(limit).to_list() - events = [self.schema_registry.deserialize_json(doc.model_dump()) for doc in docs] + events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_security_events", "event_store") @@ -214,7 +211,7 @@ async def get_correlation_chain(self, correlation_id: str) -> list[DomainEvent]: .sort([("timestamp", SortDirection.ASCENDING)]) .to_list() ) - events = [self.schema_registry.deserialize_json(doc.model_dump()) for doc in docs] + events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] duration = asyncio.get_running_loop().time() - start self.metrics.record_event_query_duration(duration, "get_correlation_chain", "event_store") @@ -238,7 +235,7 @@ async def replay_events( query["event_type"] = {"$in": event_types} async for doc in EventDocument.find(query).sort([("timestamp", SortDirection.ASCENDING)]): - event = self.schema_registry.deserialize_json(doc.model_dump()) + event = DomainEventAdapter.validate_python(doc.model_dump()) if callback: await callback(event) count += 1 @@ -316,14 +313,12 @@ async def health_check(self) -> dict[str, Any]: def create_event_store( - schema_registry: SchemaRegistryManager, logger: logging.Logger, event_metrics: EventMetrics, ttl_days: int = 90, batch_size: int = 100, ) -> EventStore: return EventStore( - schema_registry=schema_registry, logger=logger, event_metrics=event_metrics, ttl_days=ttl_days, diff --git a/backend/app/events/schema/schema_registry.py b/backend/app/events/schema/schema_registry.py index ece3a679..14cba9ce 100644 --- a/backend/app/events/schema/schema_registry.py +++ b/backend/app/events/schema/schema_registry.py @@ -1,39 +1,19 @@ import logging -import struct -from functools import lru_cache -from typing import Any, get_args, get_origin +from typing import Any from schema_registry.client import AsyncSchemaRegistryClient, schema from schema_registry.serializers import AsyncAvroMessageSerializer # type: ignore[attr-defined] -from app.domain.enums.events import EventType from app.domain.events.typed import DomainEvent from app.settings import Settings -MAGIC_BYTE = b"\x00" - - -@lru_cache(maxsize=1) -def _get_all_event_classes() -> list[type[DomainEvent]]: - """Get all concrete event classes from DomainEvent union.""" - union_type = get_args(DomainEvent)[0] # Annotated[Union[...], Discriminator] -> Union - return list(get_args(union_type)) if get_origin(union_type) else [union_type] - - -@lru_cache(maxsize=1) -def _get_event_class_mapping() -> dict[str, type[DomainEvent]]: - """Map class name -> class.""" - return {cls.__name__: cls for cls in _get_all_event_classes()} - - -@lru_cache(maxsize=1) -def _get_event_type_to_class_mapping() -> dict[EventType, type[DomainEvent]]: - """EventType -> class mapping.""" - return {cls.model_fields["event_type"].default: cls for cls in _get_all_event_classes()} - class SchemaRegistryManager: - """Schema registry manager for Avro serialization with Confluent wire format.""" + """Avro serialization via Confluent Schema Registry. + + Schemas are registered lazily by the underlying serializer on first + produce — no eager bootstrap needed. + """ def __init__(self, settings: Settings, logger: logging.Logger): self.logger = logger @@ -42,78 +22,11 @@ def __init__(self, settings: Settings, logger: logging.Logger): parts = settings.SCHEMA_REGISTRY_AUTH.split(":", 1) auth: tuple[str, str] | None = (parts[0], parts[1]) if len(parts) == 2 else None self._client = AsyncSchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL, auth=auth) # type: ignore[arg-type] - self._serializer = AsyncAvroMessageSerializer(self._client) - self._schema_id_cache: dict[type[DomainEvent], int] = {} - self._id_to_class_cache: dict[int, type[DomainEvent]] = {} - - async def register_schema(self, subject: str, event_class: type[DomainEvent]) -> int: - """Register schema and return schema ID.""" - avro_schema = schema.AvroSchema(event_class.avro_schema(namespace=self.namespace)) - schema_id: int = await self._client.register(subject, avro_schema) - self._schema_id_cache[event_class] = schema_id - self._id_to_class_cache[schema_id] = event_class - self.logger.info(f"Registered schema for {event_class.__name__}: ID {schema_id}") - return schema_id - - async def _get_event_class_by_id(self, schema_id: int) -> type[DomainEvent] | None: - """Get event class by schema ID.""" - if schema_id in self._id_to_class_cache: - return self._id_to_class_cache[schema_id] - schema_obj = await self._client.get_by_id(schema_id) - if schema_obj and (class_name := schema_obj.raw_schema.get("name")): - if cls := _get_event_class_mapping().get(class_name): - self._id_to_class_cache[schema_id] = cls - self._schema_id_cache[cls] = schema_id - return cls - return None + self.serializer = AsyncAvroMessageSerializer(self._client) async def serialize_event(self, event: DomainEvent) -> bytes: """Serialize event to Confluent wire format: [0x00][4-byte schema id][Avro binary].""" subject = f"{self.subject_prefix}{event.__class__.__name__}-value" avro_schema = schema.AvroSchema(event.__class__.avro_schema(namespace=self.namespace)) payload: dict[str, Any] = event.model_dump(mode="python", by_alias=False, exclude_unset=False) - payload.pop("event_type", None) - if "timestamp" in payload and payload["timestamp"] is not None: - payload["timestamp"] = int(payload["timestamp"].timestamp() * 1_000_000) - return await self._serializer.encode_record_with_schema(subject, avro_schema, payload) - - async def deserialize_event(self, data: bytes, topic: str) -> DomainEvent: - """Deserialize from Confluent wire format to DomainEvent.""" - if not data or len(data) < 5: - raise ValueError("Invalid message: too short for wire format") - if data[0:1] != MAGIC_BYTE: - raise ValueError(f"Unknown magic byte: {data[0]:#x}") - schema_id = struct.unpack(">I", data[1:5])[0] - event_class = await self._get_event_class_by_id(schema_id) - if not event_class: - raise ValueError(f"Unknown schema ID: {schema_id}") - obj = await self._serializer.decode_message(data) - if not isinstance(obj, dict): - raise ValueError(f"Deserialization returned {type(obj)}, expected dict") - if (f := event_class.model_fields.get("event_type")) and f.default and "event_type" not in obj: - obj["event_type"] = f.default - return event_class.model_validate(obj) - - def deserialize_json(self, data: dict[str, Any]) -> DomainEvent: - """Deserialize JSON data to DomainEvent using event_type field.""" - if not (event_type_str := data.get("event_type")): - raise ValueError("Missing event_type in event data") - if not (event_class := _get_event_type_to_class_mapping().get(EventType(event_type_str))): - raise ValueError(f"No event class found for event type: {event_type_str}") - return event_class.model_validate(data) - - async def set_compatibility(self, subject: str, mode: str) -> None: - """Set compatibility for a subject.""" - valid = {"BACKWARD", "FORWARD", "FULL", "NONE", "BACKWARD_TRANSITIVE", "FORWARD_TRANSITIVE", "FULL_TRANSITIVE"} - if mode not in valid: - raise ValueError(f"Invalid compatibility mode: {mode}") - await self._client.update_compatibility(level=mode, subject=subject) - self.logger.info(f"Set {subject} compatibility to {mode}") - - async def initialize_schemas(self) -> None: - """Initialize all event schemas in the registry.""" - for event_class in _get_all_event_classes(): - subject = f"{self.subject_prefix}{event_class.__name__}-value" - await self.set_compatibility(subject, "FORWARD") - await self.register_schema(subject, event_class) - self.logger.info(f"Initialized {len(_get_all_event_classes())} event schemas") + return await self.serializer.encode_record_with_schema(subject, avro_schema, payload) diff --git a/backend/app/services/event_replay/replay_service.py b/backend/app/services/event_replay/replay_service.py index e3fa3740..f41f5e5f 100644 --- a/backend/app/services/event_replay/replay_service.py +++ b/backend/app/services/event_replay/replay_service.py @@ -13,7 +13,7 @@ from app.db.repositories.replay_repository import ReplayRepository from app.domain.admin.replay_updates import ReplaySessionUpdate from app.domain.enums.replay import ReplayStatus, ReplayTarget -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import DomainEvent, DomainEventAdapter from app.domain.replay import ( CleanupResult, ReplayConfig, @@ -24,7 +24,6 @@ ReplaySessionState, ) from app.events.core import UnifiedProducer -from app.events.event_store import EventStore class EventReplayService: @@ -32,7 +31,6 @@ def __init__( self, repository: ReplayRepository, producer: UnifiedProducer, - event_store: EventStore, replay_metrics: ReplayMetrics, logger: logging.Logger, ) -> None: @@ -41,7 +39,6 @@ def __init__( self._resume_events: dict[str, asyncio.Event] = {} self._repository = repository self._producer = producer - self._event_store = event_store self.logger = logger self._file_locks: dict[str, asyncio.Lock] = {} self._metrics = replay_metrics @@ -228,7 +225,7 @@ async def _fetch_event_batches(self, session: ReplaySessionState) -> AsyncIterat if max_events and events_processed >= max_events: break - event = self._event_store.schema_registry.deserialize_json(doc) + event = DomainEventAdapter.validate_python(doc) if event: batch.append(event) events_processed += 1 diff --git a/backend/app/services/kafka_event_service.py b/backend/app/services/kafka_event_service.py index 28d7ed8f..b796e05b 100644 --- a/backend/app/services/kafka_event_service.py +++ b/backend/app/services/kafka_event_service.py @@ -10,7 +10,7 @@ from app.core.metrics import EventMetrics from app.db.repositories.event_repository import EventRepository from app.domain.enums.events import EventType -from app.domain.events import domain_event_adapter +from app.domain.events import DomainEventAdapter from app.domain.events.typed import DomainEvent, EventMetadata from app.events.core import UnifiedProducer from app.settings import Settings @@ -85,7 +85,7 @@ async def publish_event( "metadata": event_metadata, **payload, } - domain_event = domain_event_adapter.validate_python(event_data) + domain_event = DomainEventAdapter.validate_python(event_data) await self.event_repository.store_event(domain_event) # Publish to Kafka (headers built automatically by producer) diff --git a/backend/tests/e2e/dlq/test_dlq_manager.py b/backend/tests/e2e/dlq/test_dlq_manager.py index a37d81f0..e19e4528 100644 --- a/backend/tests/e2e/dlq/test_dlq_manager.py +++ b/backend/tests/e2e/dlq/test_dlq_manager.py @@ -12,7 +12,7 @@ from app.dlq.models import DLQMessage from app.domain.enums.events import EventType from app.domain.enums.kafka import KafkaTopic -from app.domain.events.typed import DLQMessageReceivedEvent +from app.domain.events.typed import DLQMessageReceivedEvent, DomainEventAdapter from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings from dishka import AsyncContainer @@ -53,7 +53,10 @@ async def consume_dlq_events() -> None: """Consume DLQ events and set future when our event is received.""" async for msg in events_consumer: try: - event = await schema_registry.deserialize_event(msg.value, dlq_events_topic) + payload = await schema_registry.serializer.decode_message(msg.value) + if payload is None: + continue + event = DomainEventAdapter.validate_python(payload) if ( isinstance(event, DLQMessageReceivedEvent) and event.dlq_event_id == ev.event_id diff --git a/backend/tests/e2e/events/test_schema_registry_real.py b/backend/tests/e2e/events/test_schema_registry_real.py index d6c182de..58e4900d 100644 --- a/backend/tests/e2e/events/test_schema_registry_real.py +++ b/backend/tests/e2e/events/test_schema_registry_real.py @@ -1,9 +1,9 @@ import logging import pytest -from app.domain.events.typed import EventMetadata, PodCreatedEvent + +from app.domain.events.typed import DomainEventAdapter, EventMetadata, PodCreatedEvent from app.events.schema.schema_registry import SchemaRegistryManager -from app.infrastructure.kafka.mappings import get_topic_for_event from app.settings import Settings pytestmark = [pytest.mark.e2e, pytest.mark.kafka] @@ -22,7 +22,8 @@ async def test_serialize_and_deserialize_event_real_registry(test_settings: Sett metadata=EventMetadata(service_name="s", service_version="1"), ) data = await m.serialize_event(ev) - topic = str(get_topic_for_event(ev.event_type)) - obj = await m.deserialize_event(data, topic=topic) + payload = await m.serializer.decode_message(data) + assert payload is not None + obj = DomainEventAdapter.validate_python(payload) assert isinstance(obj, PodCreatedEvent) assert obj.namespace == "n" diff --git a/backend/tests/e2e/events/test_schema_registry_roundtrip.py b/backend/tests/e2e/events/test_schema_registry_roundtrip.py index c15faca2..1fc83467 100644 --- a/backend/tests/e2e/events/test_schema_registry_roundtrip.py +++ b/backend/tests/e2e/events/test_schema_registry_roundtrip.py @@ -1,9 +1,9 @@ import logging import pytest -from app.events.schema.schema_registry import MAGIC_BYTE, SchemaRegistryManager -from app.infrastructure.kafka.mappings import get_topic_for_event -from app.settings import Settings + +from app.domain.events.typed import DomainEventAdapter +from app.events.schema.schema_registry import SchemaRegistryManager from dishka import AsyncContainer from tests.conftest import make_execution_requested_event @@ -16,20 +16,10 @@ @pytest.mark.asyncio async def test_schema_registry_serialize_deserialize_roundtrip(scope: AsyncContainer) -> None: reg: SchemaRegistryManager = await scope.get(SchemaRegistryManager) - # Schema registration happens lazily in serialize_event ev = make_execution_requested_event(execution_id="e-rt") data = await reg.serialize_event(ev) - assert data.startswith(MAGIC_BYTE) - topic = str(get_topic_for_event(ev.event_type)) - back = await reg.deserialize_event(data, topic=topic) + assert data[:1] == b"\x00" # Confluent wire format magic byte + payload = await reg.serializer.decode_message(data) + assert payload is not None + back = DomainEventAdapter.validate_python(payload) assert back.event_id == ev.event_id and getattr(back, "execution_id", None) == ev.execution_id - - # initialize_schemas should be a no-op if already initialized; call to exercise path - await reg.initialize_schemas() - - -@pytest.mark.asyncio -async def test_schema_registry_deserialize_invalid_header(test_settings: Settings) -> None: - reg = SchemaRegistryManager(settings=test_settings, logger=_test_logger) - with pytest.raises(ValueError): - await reg.deserialize_event(b"\x01\x00\x00\x00\x01", topic="t") # wrong magic byte diff --git a/backend/tests/unit/domain/events/test_event_schema_coverage.py b/backend/tests/unit/domain/events/test_event_schema_coverage.py index dd150e3e..5888ed92 100644 --- a/backend/tests/unit/domain/events/test_event_schema_coverage.py +++ b/backend/tests/unit/domain/events/test_event_schema_coverage.py @@ -12,8 +12,7 @@ from typing import get_args from app.domain.enums.events import EventType -from app.domain.events.typed import BaseEvent, DomainEvent, domain_event_adapter -from app.events.schema.schema_registry import _get_event_type_to_class_mapping +from app.domain.events.typed import BaseEvent, DomainEvent, DomainEventAdapter def get_domain_event_classes() -> dict[EventType, type]: @@ -50,8 +49,8 @@ def get_domain_event_classes() -> dict[EventType, type]: def get_kafka_event_classes() -> dict[EventType, type]: - """Extract EventType -> class mapping from Kafka DomainEvent subclasses.""" - return _get_event_type_to_class_mapping() + """Extract EventType -> class mapping from DomainEvent union (same source).""" + return get_domain_event_classes() class TestEventSchemaCoverage: @@ -84,15 +83,15 @@ def test_all_event_types_have_kafka_event_class(self) -> None: ) ) - def test_domain_event_adapter_covers_all_types(self) -> None: - """The domain_event_adapter TypeAdapter must handle all EventTypes.""" + def test_DomainEventAdapter_covers_all_types(self) -> None: + """The DomainEventAdapter TypeAdapter must handle all EventTypes.""" errors: list[str] = [] for et in EventType: try: # Validation will fail due to missing required fields, but that's OK # We just want to confirm the type IS in the union (not "unknown discriminator") - domain_event_adapter.validate_python({"event_type": et}) + DomainEventAdapter.validate_python({"event_type": et}) except Exception as e: error_str = str(e).lower() # "validation error" means type IS recognized but fields are missing - that's fine @@ -100,7 +99,7 @@ def test_domain_event_adapter_covers_all_types(self) -> None: if "no match" in error_str or "unable to extract" in error_str: errors.append(f" - {et.value}: not in DomainEvent union") - assert not errors, f"domain_event_adapter missing {len(errors)} type(s):\n" + "\n".join(errors) + assert not errors, f"DomainEventAdapter missing {len(errors)} type(s):\n" + "\n".join(errors) def test_no_orphan_domain_event_classes(self) -> None: """All domain event classes must have a corresponding EventType.""" diff --git a/backend/tests/unit/events/test_schema_registry_manager.py b/backend/tests/unit/events/test_schema_registry_manager.py index 118c4c7c..cdc3159b 100644 --- a/backend/tests/unit/events/test_schema_registry_manager.py +++ b/backend/tests/unit/events/test_schema_registry_manager.py @@ -1,16 +1,11 @@ -import logging - import pytest -from app.domain.enums.execution import QueuePriority -from app.domain.events.typed import ExecutionRequestedEvent -from app.events.schema.schema_registry import SchemaRegistryManager -from app.settings import Settings +from pydantic import ValidationError -_test_logger = logging.getLogger("test.events.schema_registry_manager") +from app.domain.enums.execution import QueuePriority +from app.domain.events.typed import DomainEventAdapter, ExecutionRequestedEvent -def test_deserialize_json_execution_requested(test_settings: Settings) -> None: - m = SchemaRegistryManager(test_settings, logger=_test_logger) +def test_domain_event_adapter_execution_requested() -> None: data = { "event_type": "execution_requested", "execution_id": "e1", @@ -28,13 +23,12 @@ def test_deserialize_json_execution_requested(test_settings: Settings) -> None: "priority": QueuePriority.NORMAL, "metadata": {"service_name": "t", "service_version": "1.0"}, } - ev = m.deserialize_json(data) + ev = DomainEventAdapter.validate_python(data) assert isinstance(ev, ExecutionRequestedEvent) assert ev.execution_id == "e1" assert ev.language == "python" -def test_deserialize_json_missing_type_raises(test_settings: Settings) -> None: - m = SchemaRegistryManager(test_settings, logger=_test_logger) - with pytest.raises(ValueError): - m.deserialize_json({}) +def test_domain_event_adapter_missing_type_raises() -> None: + with pytest.raises(ValidationError): + DomainEventAdapter.validate_python({}) From 61ef83a05fb740cf30ba8fdbd0a8a385debb87d0 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 17:09:03 +0100 Subject: [PATCH 02/10] fix: simplification of schema registry --- backend/app/events/schema/schema_registry.py | 12 ++++-------- .../app/services/event_replay/replay_service.py | 17 +++++++++++++---- backend/app/settings.py | 5 ----- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/backend/app/events/schema/schema_registry.py b/backend/app/events/schema/schema_registry.py index 14cba9ce..af67d943 100644 --- a/backend/app/events/schema/schema_registry.py +++ b/backend/app/events/schema/schema_registry.py @@ -1,5 +1,4 @@ import logging -from typing import Any from schema_registry.client import AsyncSchemaRegistryClient, schema from schema_registry.serializers import AsyncAvroMessageSerializer # type: ignore[attr-defined] @@ -19,14 +18,11 @@ def __init__(self, settings: Settings, logger: logging.Logger): self.logger = logger self.namespace = "com.integr8scode.events" self.subject_prefix = settings.SCHEMA_SUBJECT_PREFIX - parts = settings.SCHEMA_REGISTRY_AUTH.split(":", 1) - auth: tuple[str, str] | None = (parts[0], parts[1]) if len(parts) == 2 else None - self._client = AsyncSchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL, auth=auth) # type: ignore[arg-type] + self._client = AsyncSchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL) self.serializer = AsyncAvroMessageSerializer(self._client) async def serialize_event(self, event: DomainEvent) -> bytes: """Serialize event to Confluent wire format: [0x00][4-byte schema id][Avro binary].""" - subject = f"{self.subject_prefix}{event.__class__.__name__}-value" - avro_schema = schema.AvroSchema(event.__class__.avro_schema(namespace=self.namespace)) - payload: dict[str, Any] = event.model_dump(mode="python", by_alias=False, exclude_unset=False) - return await self.serializer.encode_record_with_schema(subject, avro_schema, payload) + avro = schema.AvroSchema(event.avro_schema(namespace=self.namespace)) + subject = f"{self.subject_prefix}{avro.name}-value" + return await self.serializer.encode_record_with_schema(subject, avro, event.model_dump()) diff --git a/backend/app/services/event_replay/replay_service.py b/backend/app/services/event_replay/replay_service.py index f41f5e5f..0ff77b11 100644 --- a/backend/app/services/event_replay/replay_service.py +++ b/backend/app/services/event_replay/replay_service.py @@ -7,6 +7,7 @@ import aiofiles from opentelemetry.trace import SpanKind +from pydantic import ValidationError from app.core.metrics import ReplayMetrics from app.core.tracing.utils import trace_span @@ -225,10 +226,18 @@ async def _fetch_event_batches(self, session: ReplaySessionState) -> AsyncIterat if max_events and events_processed >= max_events: break - event = DomainEventAdapter.validate_python(doc) - if event: - batch.append(event) - events_processed += 1 + try: + event = DomainEventAdapter.validate_python(doc) + except ValidationError as e: + session.failed_events += 1 + self.logger.warning( + "Skipping event that failed validation", + extra={"event_id": doc.get("event_id", "unknown"), "error": str(e)}, + ) + continue + + batch.append(event) + events_processed += 1 if batch: yield batch diff --git a/backend/app/settings.py b/backend/app/settings.py index 4de6729c..34522a24 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -91,11 +91,6 @@ def __init__( # Event-Driven Design Configuration KAFKA_BOOTSTRAP_SERVERS: str = "kafka:29092" SCHEMA_REGISTRY_URL: str = "http://schema-registry:8081" - SCHEMA_REGISTRY_AUTH: str = Field( - default="", - pattern=r"^($|[^:]+:.+)$", - description="Schema registry auth in 'username:password' format, or empty for no auth", - ) ENABLE_EVENT_STREAMING: bool = False EVENT_RETENTION_DAYS: int = 30 KAFKA_TOPIC_PREFIX: str = "pref" From 748abbfbbbc1e2d297674f1be444c46a521ba172 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 19:31:17 +0100 Subject: [PATCH 03/10] fix: misc --- backend/app/core/providers.py | 20 +- .../app/db/repositories/event_repository.py | 8 +- backend/app/events/core/producer.py | 6 +- backend/app/events/event_store.py | 326 ------------------ backend/app/events/handlers.py | 26 -- backend/app/main.py | 2 - backend/app/services/execution_service.py | 16 +- backend/app/services/kafka_event_service.py | 25 +- backend/tests/e2e/core/test_container.py | 2 +- .../tests/e2e/core/test_dishka_lifespan.py | 9 - backend/tests/e2e/events/test_event_store.py | 153 -------- .../unit/services/pod_monitor/test_monitor.py | 17 - 12 files changed, 29 insertions(+), 581 deletions(-) delete mode 100644 backend/app/events/event_store.py delete mode 100644 backend/tests/e2e/events/test_event_store.py diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py index 3e4ed38d..ae6579fa 100644 --- a/backend/app/core/providers.py +++ b/backend/app/core/providers.py @@ -52,7 +52,6 @@ from app.domain.rate_limit import RateLimitConfig from app.domain.saga.models import SagaConfig from app.events.core import UnifiedProducer -from app.events.event_store import EventStore, create_event_store from app.events.schema.schema_registry import SchemaRegistryManager from app.services.admin import AdminEventsService, AdminSettingsService, AdminUserService from app.services.auth_service import AuthService @@ -182,11 +181,12 @@ def get_unified_producer( self, broker: KafkaBroker, schema_registry: SchemaRegistryManager, + event_repository: EventRepository, logger: logging.Logger, settings: Settings, event_metrics: EventMetrics, ) -> UnifiedProducer: - return UnifiedProducer(broker, schema_registry, logger, settings, event_metrics) + return UnifiedProducer(broker, schema_registry, event_repository, logger, settings, event_metrics) @provide def get_idempotency_repository(self, redis_client: redis.Redis) -> RedisIdempotencyRepository: @@ -279,16 +279,6 @@ class EventProvider(Provider): def get_schema_registry(self, settings: Settings, logger: logging.Logger) -> SchemaRegistryManager: return SchemaRegistryManager(settings, logger) - @provide - def get_event_store( - self, - logger: logging.Logger, - event_metrics: EventMetrics, - ) -> EventStore: - return create_event_store( - logger=logger, event_metrics=event_metrics, ttl_days=90 - ) - class KubernetesProvider(Provider): scope = Scope.APP @@ -478,14 +468,12 @@ def get_event_service(self, event_repository: EventRepository) -> EventService: @provide def get_kafka_event_service( self, - event_repository: EventRepository, kafka_producer: UnifiedProducer, settings: Settings, logger: logging.Logger, event_metrics: EventMetrics, ) -> KafkaEventService: return KafkaEventService( - event_repository=event_repository, kafka_producer=kafka_producer, settings=settings, logger=logger, @@ -629,7 +617,7 @@ def get_execution_service( self, execution_repository: ExecutionRepository, kafka_producer: UnifiedProducer, - event_store: EventStore, + event_repository: EventRepository, settings: Settings, logger: logging.Logger, execution_metrics: ExecutionMetrics, @@ -637,7 +625,7 @@ def get_execution_service( return ExecutionService( execution_repo=execution_repository, producer=kafka_producer, - event_store=event_store, + event_repository=event_repository, settings=settings, logger=logger, execution_metrics=execution_metrics, diff --git a/backend/app/db/repositories/event_repository.py b/backend/app/db/repositories/event_repository.py index 7b250953..3fc62527 100644 --- a/backend/app/db/repositories/event_repository.py +++ b/backend/app/db/repositories/event_repository.py @@ -6,6 +6,7 @@ from beanie.odm.operators.find import BaseFindOperator from beanie.operators import GTE, LT, LTE, Eq, In, Not, Or, RegEx from monggregate import Pipeline, S +from pymongo.errors import DuplicateKeyError from app.core.tracing import EventAttributes from app.core.tracing.utils import add_span_attributes @@ -41,6 +42,7 @@ def _build_time_filter(self, start_time: datetime | None, end_time: datetime | N return {key: value for key, value in {"$gte": start_time, "$lte": end_time}.items() if value is not None} async def store_event(self, event: DomainEvent) -> str: + """Idempotent event store — silently ignores duplicates by event_id.""" data = event.model_dump(exclude_none=True) data.setdefault("stored_at", datetime.now(timezone.utc)) doc = EventDocument(**data) @@ -51,7 +53,11 @@ async def store_event(self, event: DomainEvent) -> str: str(EventAttributes.EXECUTION_ID): event.aggregate_id or "", } ) - await doc.insert() + try: + await doc.insert() + except DuplicateKeyError: + self.logger.debug(f"Event {event.event_id} already stored, skipping") + return event.event_id self.logger.debug(f"Stored event {event.event_id} of type {event.event_type}") return event.event_id diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py index b7b4037c..1a9ed1ac 100644 --- a/backend/app/events/core/producer.py +++ b/backend/app/events/core/producer.py @@ -8,6 +8,7 @@ from app.core.metrics import EventMetrics from app.core.tracing.utils import inject_trace_context +from app.db.repositories.event_repository import EventRepository from app.dlq.models import DLQMessage, DLQMessageStatus from app.domain.enums.kafka import KafkaTopic from app.domain.events.typed import DomainEvent @@ -27,18 +28,21 @@ def __init__( self, broker: KafkaBroker, schema_registry_manager: SchemaRegistryManager, + event_repository: EventRepository, logger: logging.Logger, settings: Settings, event_metrics: EventMetrics, ): self._broker = broker self._schema_registry = schema_registry_manager + self._event_repository = event_repository self.logger = logger self._event_metrics = event_metrics self._topic_prefix = settings.KAFKA_TOPIC_PREFIX async def produce(self, event_to_produce: DomainEvent, key: str) -> None: - """Produce a message to Kafka.""" + """Persist event to MongoDB, then publish to Kafka.""" + await self._event_repository.store_event(event_to_produce) topic = f"{self._topic_prefix}{EVENT_TYPE_TO_TOPIC[event_to_produce.event_type]}" try: serialized_value = await self._schema_registry.serialize_event(event_to_produce) diff --git a/backend/app/events/event_store.py b/backend/app/events/event_store.py deleted file mode 100644 index 8a1fa574..00000000 --- a/backend/app/events/event_store.py +++ /dev/null @@ -1,326 +0,0 @@ -import asyncio -import logging -from collections.abc import Awaitable, Callable -from datetime import datetime, timedelta, timezone -from typing import Any - -from beanie.odm.enums import SortDirection -from pymongo.errors import BulkWriteError, DuplicateKeyError - -from app.core.metrics import EventMetrics -from app.core.tracing import EventAttributes -from app.core.tracing.utils import add_span_attributes -from app.db.docs import EventDocument -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent, DomainEventAdapter - - -class EventStore: - def __init__( - self, - logger: logging.Logger, - event_metrics: EventMetrics, - ttl_days: int = 90, - batch_size: int = 100, - ): - self.metrics = event_metrics - self.logger = logger - self.ttl_days = ttl_days - self.batch_size = batch_size - self._initialized = False - - self._SECURITY_TYPES = [ - EventType.USER_LOGIN, - EventType.USER_LOGGED_OUT, - EventType.SECURITY_VIOLATION, - ] - - async def initialize(self) -> None: - if self._initialized: - return - self._initialized = True - self.logger.info("Event store initialized with Beanie") - - async def store_event(self, event: DomainEvent) -> bool: - start = asyncio.get_running_loop().time() - try: - now = datetime.now(timezone.utc) - ttl = now + timedelta(days=self.ttl_days) - doc = EventDocument(**event.model_dump(exclude_none=True), stored_at=now, ttl_expires_at=ttl) - await doc.insert() - - add_span_attributes( - **{ - str(EventAttributes.EVENT_TYPE): str(event.event_type), - str(EventAttributes.EVENT_ID): event.event_id, - str(EventAttributes.EXECUTION_ID): event.aggregate_id or "", - } - ) - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_store_duration(duration, "store_single", "event_store") - self.metrics.record_event_stored(event.event_type, "event_store") - return True - except DuplicateKeyError: - self.logger.warning(f"Event {event.event_id} already exists") - return True - except Exception as e: - self.logger.error(f"Failed to store event {event.event_id}: {e.__class__.__name__}: {e}", exc_info=True) - self.metrics.record_event_store_failed(event.event_type, type(e).__name__) - return False - - async def store_batch(self, events: list[DomainEvent]) -> dict[str, int]: - start = asyncio.get_running_loop().time() - results = {"total": len(events), "stored": 0, "duplicates": 0, "failed": 0} - if not events: - return results - - now = datetime.now(timezone.utc) - ttl = now + timedelta(days=self.ttl_days) - try: - docs = [EventDocument(**e.model_dump(exclude_none=True), stored_at=now, ttl_expires_at=ttl) for e in events] - - try: - await EventDocument.insert_many(docs) - results["stored"] = len(docs) - except Exception as e: - if isinstance(e, BulkWriteError) and e.details: - for err in e.details.get("writeErrors", []): - if err.get("code") == 11000: - results["duplicates"] += 1 - else: - results["failed"] += 1 - results["stored"] = results["total"] - results["duplicates"] - results["failed"] - else: - raise - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_store_duration(duration, "store_batch", "event_store") - add_span_attributes(**{"events.batch.count": len(events)}) - if results["stored"] > 0: - for event in events: - self.metrics.record_event_stored(event.event_type, "event_store") - return results - except Exception as e: - self.logger.error(f"Failed to store batch: {e.__class__.__name__}: {e}", exc_info=True) - results["failed"] = results["total"] - results["stored"] - return results - - async def get_event(self, event_id: str) -> DomainEvent | None: - start = asyncio.get_running_loop().time() - doc = await EventDocument.find_one({"event_id": event_id}) - if not doc: - return None - - event = DomainEventAdapter.validate_python(doc.model_dump()) - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_by_id", "event_store") - return event - - async def get_events_by_type( - self, - event_type: EventType, - start_time: datetime | None = None, - end_time: datetime | None = None, - limit: int = 100, - offset: int = 0, - ) -> list[DomainEvent]: - start = asyncio.get_running_loop().time() - query: dict[str, Any] = {"event_type": event_type} - if tr := self._time_range(start_time, end_time): - query["timestamp"] = tr - - docs = await ( - EventDocument.find(query) - .sort([("timestamp", SortDirection.DESCENDING)]) - .skip(offset) - .limit(limit) - .to_list() - ) - events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_by_type", "event_store") - return events - - async def get_execution_events( - self, - execution_id: str, - event_types: list[EventType] | None = None, - ) -> list[DomainEvent]: - start = asyncio.get_running_loop().time() - query: dict[str, Any] = {"$or": [{"execution_id": execution_id}, {"aggregate_id": execution_id}]} - if event_types: - query["event_type"] = {"$in": event_types} - - docs = await EventDocument.find(query).sort([("timestamp", SortDirection.ASCENDING)]).to_list() - events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_execution_events", "event_store") - return events - - async def get_user_events( - self, - user_id: str, - event_types: list[EventType] | None = None, - start_time: datetime | None = None, - end_time: datetime | None = None, - limit: int = 100, - ) -> list[DomainEvent]: - start = asyncio.get_running_loop().time() - query: dict[str, Any] = {"metadata.user_id": str(user_id)} - if event_types: - query["event_type"] = {"$in": event_types} - if tr := self._time_range(start_time, end_time): - query["timestamp"] = tr - - docs = await EventDocument.find(query).sort([("timestamp", SortDirection.DESCENDING)]).limit(limit).to_list() - events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_user_events", "event_store") - return events - - async def get_security_events( - self, - start_time: datetime | None = None, - end_time: datetime | None = None, - user_id: str | None = None, - limit: int = 100, - ) -> list[DomainEvent]: - start = asyncio.get_running_loop().time() - query: dict[str, Any] = {"event_type": {"$in": self._SECURITY_TYPES}} - if user_id: - query["metadata.user_id"] = str(user_id) - if tr := self._time_range(start_time, end_time): - query["timestamp"] = tr - - docs = await EventDocument.find(query).sort([("timestamp", SortDirection.DESCENDING)]).limit(limit).to_list() - events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_security_events", "event_store") - return events - - async def get_correlation_chain(self, correlation_id: str) -> list[DomainEvent]: - start = asyncio.get_running_loop().time() - docs = await ( - EventDocument.find({"metadata.correlation_id": str(correlation_id)}) - .sort([("timestamp", SortDirection.ASCENDING)]) - .to_list() - ) - events = [DomainEventAdapter.validate_python(doc.model_dump()) for doc in docs] - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "get_correlation_chain", "event_store") - return events - - async def replay_events( - self, - start_time: datetime, - end_time: datetime | None = None, - event_types: list[EventType] | None = None, - callback: Callable[[DomainEvent], Awaitable[None]] | None = None, - ) -> int: - start = asyncio.get_running_loop().time() - count = 0 - - try: - query: dict[str, Any] = {"timestamp": {"$gte": start_time}} - if end_time: - query["timestamp"]["$lte"] = end_time - if event_types: - query["event_type"] = {"$in": event_types} - - async for doc in EventDocument.find(query).sort([("timestamp", SortDirection.ASCENDING)]): - event = DomainEventAdapter.validate_python(doc.model_dump()) - if callback: - await callback(event) - count += 1 - - duration = asyncio.get_running_loop().time() - start - self.metrics.record_event_query_duration(duration, "replay_events", "event_store") - self.logger.info(f"Replayed {count} events from {start_time} to {end_time}") - return count - except Exception as e: - self.logger.error(f"Failed to replay events: {e}") - return count - - async def get_event_stats( - self, - start_time: datetime | None = None, - end_time: datetime | None = None, - ) -> dict[str, Any]: - pipeline: list[dict[str, Any]] = [] - if start_time or end_time: - match: dict[str, Any] = {} - if start_time: - match["timestamp"] = {"$gte": start_time} - if end_time: - match.setdefault("timestamp", {})["$lte"] = end_time - pipeline.append({"$match": match}) - - pipeline.extend( - [ - { - "$group": { - "_id": "$event_type", - "count": {"$sum": 1}, - "first_event": {"$min": "$timestamp"}, - "last_event": {"$max": "$timestamp"}, - } - }, - {"$sort": {"count": -1}}, - ] - ) - - stats: dict[str, Any] = {"total_events": 0, "event_types": {}, "start_time": start_time, "end_time": end_time} - async for r in EventDocument.aggregate(pipeline): - et = r["_id"] - c = r["count"] - stats["event_types"][et] = { - "count": c, - "first_event": r["first_event"], - "last_event": r["last_event"], - } - stats["total_events"] += c - return stats - - def _time_range(self, start_time: datetime | None, end_time: datetime | None) -> dict[str, Any] | None: - if not start_time and not end_time: - return None - tr: dict[str, Any] = {} - if start_time: - tr["$gte"] = start_time - if end_time: - tr["$lte"] = end_time - return tr - - async def health_check(self) -> dict[str, Any]: - try: - event_count = await EventDocument.count() - return { - "healthy": True, - "event_count": event_count, - "collection": "events", - "initialized": self._initialized, - } - except Exception as e: - self.logger.error(f"Event store health check failed: {e}") - return {"healthy": False, "error": str(e)} - - -def create_event_store( - logger: logging.Logger, - event_metrics: EventMetrics, - ttl_days: int = 90, - batch_size: int = 100, -) -> EventStore: - return EventStore( - logger=logger, - event_metrics=event_metrics, - ttl_days=ttl_days, - batch_size=batch_size, - ) diff --git a/backend/app/events/handlers.py b/backend/app/events/handlers.py index c3d8166c..4179e1bd 100644 --- a/backend/app/events/handlers.py +++ b/backend/app/events/handlers.py @@ -26,10 +26,6 @@ ExecutionTimeoutEvent, ) from app.domain.idempotency import KeyStrategy -from app.events.core import UnifiedProducer -from app.events.event_store import EventStore -from app.infrastructure.kafka.mappings import EVENT_TYPE_TO_TOPIC -from app.infrastructure.kafka.topics import get_all_topics from app.services.coordinator.coordinator import ExecutionCoordinator from app.services.idempotency import IdempotencyManager from app.services.k8s_worker import KubernetesWorker @@ -239,28 +235,6 @@ async def on_unhandled(body: DomainEvent) -> None: pass -def register_event_store_subscriber(broker: KafkaBroker, settings: Settings) -> None: - topics = [f"{settings.KAFKA_TOPIC_PREFIX}{t}" for t in get_all_topics()] - - @broker.subscriber( - *topics, - group_id="event-store-consumer", - ack_policy=AckPolicy.ACK, - max_poll_records=100, - ) - async def on_any_event( - body: DomainEvent, - event_store: FromDishka[EventStore], - producer: FromDishka[UnifiedProducer], - logger: FromDishka[logging.Logger], - ) -> None: - try: - await event_store.store_event(body) - except Exception as err: - logger.error(f"Error storing event {body.event_id}: {err}", exc_info=True) - topic = str(EVENT_TYPE_TO_TOPIC.get(body.event_type, "unknown")) - await producer.send_to_dlq(body, topic, err, 0) - def register_sse_subscriber(broker: KafkaBroker, settings: Settings) -> None: @broker.subscriber( diff --git a/backend/app/main.py b/backend/app/main.py index f308fece..607ba1b7 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -42,7 +42,6 @@ ) from app.events.broker import create_broker from app.events.handlers import ( - register_event_store_subscriber, register_notification_subscriber, register_sse_subscriber, ) @@ -64,7 +63,6 @@ def create_app(settings: Settings | None = None) -> FastAPI: # Create Kafka broker and register in-app subscribers schema_registry = SchemaRegistryManager(settings, logger) broker = create_broker(settings, schema_registry, logger) - register_event_store_subscriber(broker, settings) register_sse_subscriber(broker, settings) register_notification_subscriber(broker, settings) diff --git a/backend/app/services/execution_service.py b/backend/app/services/execution_service.py index 9312f812..35cac84c 100644 --- a/backend/app/services/execution_service.py +++ b/backend/app/services/execution_service.py @@ -6,6 +6,7 @@ from app.core.correlation import CorrelationContext from app.core.metrics import ExecutionMetrics +from app.db.repositories.event_repository import EventRepository from app.db.repositories.execution_repository import ExecutionRepository from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus, QueuePriority @@ -24,7 +25,6 @@ ResourceLimitsDomain, ) from app.events.core import UnifiedProducer -from app.events.event_store import EventStore from app.runtime_registry import RUNTIME_REGISTRY from app.settings import Settings @@ -49,7 +49,7 @@ def __init__( self, execution_repo: ExecutionRepository, producer: UnifiedProducer, - event_store: EventStore, + event_repository: EventRepository, settings: Settings, logger: logging.Logger, execution_metrics: ExecutionMetrics, @@ -60,14 +60,14 @@ def __init__( Args: execution_repo: Repository for execution data persistence. producer: Kafka producer for publishing events. - event_store: Event store for event persistence. + event_repository: Repository for event queries. settings: Application settings. logger: Logger instance. execution_metrics: Metrics for tracking execution operations. """ self.execution_repo = execution_repo self.producer = producer - self.event_store = event_store + self.event_repository = event_repository self.settings = settings self.logger = logger self.metrics = execution_metrics @@ -303,12 +303,8 @@ async def get_execution_events( Returns: List of events for the execution. """ - # Use the correct method name - get_execution_events instead of get_events_by_execution - events = await self.event_store.get_execution_events(execution_id=execution_id, event_types=event_types) - - # Apply limit if we got more events than requested - if len(events) > limit: - events = events[:limit] + result = await self.event_repository.get_execution_events(execution_id=execution_id, limit=limit) + events = result.events self.logger.debug( f"Retrieved {len(events)} events for execution {execution_id}", diff --git a/backend/app/services/kafka_event_service.py b/backend/app/services/kafka_event_service.py index b796e05b..deca49a3 100644 --- a/backend/app/services/kafka_event_service.py +++ b/backend/app/services/kafka_event_service.py @@ -8,7 +8,6 @@ from app.core.correlation import CorrelationContext from app.core.metrics import EventMetrics -from app.db.repositories.event_repository import EventRepository from app.domain.enums.events import EventType from app.domain.events import DomainEventAdapter from app.domain.events.typed import DomainEvent, EventMetadata @@ -21,13 +20,11 @@ class KafkaEventService: def __init__( self, - event_repository: EventRepository, kafka_producer: UnifiedProducer, settings: Settings, logger: logging.Logger, event_metrics: EventMetrics, ): - self.event_repository = event_repository self.kafka_producer = kafka_producer self.logger = logger self.metrics = event_metrics @@ -42,17 +39,9 @@ async def publish_event( metadata: EventMetadata | None = None, ) -> str: """ - Publish an event to Kafka and store an audit copy via the repository + Build a typed DomainEvent from parameters and publish to Kafka. - Args: - event_type: Type of event (e.g., "execution.requested") - payload: Event-specific data - aggregate_id: ID of the aggregate root - correlation_id: ID for correlating related events - metadata: Event metadata (service/user/trace/IP). If None, service fills minimal defaults. - - Returns: - Event ID of published event + The producer persists the event to MongoDB before publishing. """ with tracer.start_as_current_span("publish_event") as span: span.set_attribute("event.type", event_type) @@ -86,9 +75,7 @@ async def publish_event( **payload, } domain_event = DomainEventAdapter.validate_python(event_data) - await self.event_repository.store_event(domain_event) - # Publish to Kafka (headers built automatically by producer) await self.kafka_producer.produce(event_to_produce=domain_event, key=aggregate_id or domain_event.event_id) self.metrics.record_event_published(event_type) self.metrics.record_event_processing_duration(time.time() - start_time, event_type) @@ -159,19 +146,19 @@ async def publish_pod_event( ) async def publish_domain_event(self, event: DomainEvent, key: str | None = None) -> str: - """Publish a pre-built DomainEvent to Kafka and store an audit copy.""" + """Publish a pre-built DomainEvent to Kafka. + + The producer persists the event to MongoDB before publishing. + """ with tracer.start_as_current_span("publish_domain_event") as span: span.set_attribute("event.type", event.event_type) if event.aggregate_id: span.set_attribute("aggregate.id", event.aggregate_id) start_time = time.time() - await self.event_repository.store_event(event) await self.kafka_producer.produce(event_to_produce=event, key=key or event.aggregate_id or event.event_id) self.metrics.record_event_published(event.event_type) self.metrics.record_event_processing_duration(time.time() - start_time, event.event_type) self.logger.info("Domain event published", extra={"event_id": event.event_id}) return event.event_id - - diff --git a/backend/tests/e2e/core/test_container.py b/backend/tests/e2e/core/test_container.py index da711f02..45ac8ae5 100644 --- a/backend/tests/e2e/core/test_container.py +++ b/backend/tests/e2e/core/test_container.py @@ -170,7 +170,7 @@ async def test_execution_service_has_dependencies( # Check that key dependencies are present assert service.settings is not None assert service.execution_repo is not None - assert service.event_store is not None + assert service.event_repository is not None @pytest.mark.asyncio async def test_security_service_uses_settings( diff --git a/backend/tests/e2e/core/test_dishka_lifespan.py b/backend/tests/e2e/core/test_dishka_lifespan.py index fa4e795a..e1e387a8 100644 --- a/backend/tests/e2e/core/test_dishka_lifespan.py +++ b/backend/tests/e2e/core/test_dishka_lifespan.py @@ -88,12 +88,3 @@ async def test_sse_redis_bus_available(self, scope: AsyncContainer) -> None: bus = await scope.get(SSERedisBus) assert bus is not None - @pytest.mark.asyncio - async def test_event_store_available( - self, scope: AsyncContainer - ) -> None: - """Event store is available after lifespan.""" - from app.events.event_store import EventStore - - event_store = await scope.get(EventStore) - assert event_store is not None diff --git a/backend/tests/e2e/events/test_event_store.py b/backend/tests/e2e/events/test_event_store.py deleted file mode 100644 index 8bd82173..00000000 --- a/backend/tests/e2e/events/test_event_store.py +++ /dev/null @@ -1,153 +0,0 @@ -import logging -import uuid -from datetime import datetime, timedelta, timezone - -import pytest -from app.db.docs import EventDocument -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent -from app.events.event_store import EventStore -from dishka import AsyncContainer - -from tests.conftest import make_execution_requested_event - -pytestmark = [pytest.mark.e2e, pytest.mark.mongodb] - -_test_logger = logging.getLogger("test.events.event_store") - - -@pytest.mark.asyncio -async def test_event_store_stores_single_event(scope: AsyncContainer) -> None: - """Test that EventStore.store_event() persists an event to MongoDB.""" - store: EventStore = await scope.get(EventStore) - - # Create a unique event - execution_id = f"exec-{uuid.uuid4().hex[:8]}" - event = make_execution_requested_event(execution_id=execution_id) - - # Store the event - result = await store.store_event(event) - assert result is True - - # Verify it's in MongoDB - doc = await EventDocument.find_one({"event_id": event.event_id}) - assert doc is not None - assert doc.event_id == event.event_id - assert doc.event_type == EventType.EXECUTION_REQUESTED - assert doc.aggregate_id == execution_id - assert doc.stored_at is not None - assert doc.ttl_expires_at is not None - # TTL should be ~90 days in the future - assert doc.ttl_expires_at > datetime.now(timezone.utc) + timedelta(days=89) - - -@pytest.mark.asyncio -async def test_event_store_stores_batch(scope: AsyncContainer) -> None: - """Test that EventStore.store_batch() persists multiple events.""" - store: EventStore = await scope.get(EventStore) - - # Create multiple unique events - events: list[DomainEvent] = [ - make_execution_requested_event(execution_id=f"exec-batch-{uuid.uuid4().hex[:8]}") - for _ in range(5) - ] - - # Store the batch - results = await store.store_batch(events) - - assert results["total"] == 5 - assert results["stored"] == 5 - assert results["duplicates"] == 0 - assert results["failed"] == 0 - - # Verify all events are in MongoDB - for event in events: - doc = await EventDocument.find_one({"event_id": event.event_id}) - assert doc is not None - assert doc.event_type == EventType.EXECUTION_REQUESTED - - -@pytest.mark.asyncio -async def test_event_store_handles_duplicates(scope: AsyncContainer) -> None: - """Test that EventStore handles duplicate event IDs gracefully.""" - store: EventStore = await scope.get(EventStore) - - # Create an event - event = make_execution_requested_event(execution_id=f"exec-dup-{uuid.uuid4().hex[:8]}") - - # Store it twice - result1 = await store.store_event(event) - result2 = await store.store_event(event) - - # Both should succeed (second is a no-op due to duplicate handling) - assert result1 is True - assert result2 is True - - # Only one document should exist - count = await EventDocument.find({"event_id": event.event_id}).count() - assert count == 1 - - -@pytest.mark.asyncio -async def test_event_store_batch_handles_duplicates(scope: AsyncContainer) -> None: - """Test that store_batch handles duplicates within the batch.""" - store: EventStore = await scope.get(EventStore) - - # Create an event and store it first - event = make_execution_requested_event(execution_id=f"exec-batch-dup-{uuid.uuid4().hex[:8]}") - await store.store_event(event) - - # Create a batch with one new event and one duplicate - new_event = make_execution_requested_event(execution_id=f"exec-batch-new-{uuid.uuid4().hex[:8]}") - batch: list[DomainEvent] = [new_event, event] # event is already stored - - results = await store.store_batch(batch) - - assert results["total"] == 2 - assert results["stored"] == 1 # Only the new one - assert results["duplicates"] == 1 # The duplicate - - -@pytest.mark.asyncio -async def test_event_store_retrieves_by_id(scope: AsyncContainer) -> None: - """Test that EventStore.get_event() retrieves a stored event.""" - store: EventStore = await scope.get(EventStore) - - # Create and store an event - execution_id = f"exec-get-{uuid.uuid4().hex[:8]}" - event = make_execution_requested_event(execution_id=execution_id, script="print('test')") - await store.store_event(event) - - # Retrieve it - retrieved = await store.get_event(event.event_id) - - assert retrieved is not None - assert retrieved.event_id == event.event_id - assert retrieved.event_type == EventType.EXECUTION_REQUESTED - - -@pytest.mark.asyncio -async def test_event_store_retrieves_by_type(scope: AsyncContainer) -> None: - """Test that EventStore.get_events_by_type() works correctly.""" - store: EventStore = await scope.get(EventStore) - - # Store a few events - unique_prefix = uuid.uuid4().hex[:8] - events: list[DomainEvent] = [ - make_execution_requested_event(execution_id=f"exec-type-{unique_prefix}-{i}") - for i in range(3) - ] - await store.store_batch(events) - - # Query by type - retrieved = await store.get_events_by_type( - EventType.EXECUTION_REQUESTED, - limit=100, - ) - - # Should find at least our 3 events - assert len(retrieved) >= 3 - - # All should be EXECUTION_REQUESTED - for ev in retrieved: - assert ev.event_type == EventType.EXECUTION_REQUESTED diff --git a/backend/tests/unit/services/pod_monitor/test_monitor.py b/backend/tests/unit/services/pod_monitor/test_monitor.py index 1c0bdec8..14f0a61d 100644 --- a/backend/tests/unit/services/pod_monitor/test_monitor.py +++ b/backend/tests/unit/services/pod_monitor/test_monitor.py @@ -5,7 +5,6 @@ import pytest from app.core.metrics import EventMetrics, KubernetesMetrics -from app.db.repositories.event_repository import EventRepository from app.domain.events.typed import ( DomainEvent, EventMetadata, @@ -42,18 +41,6 @@ # ===== Test doubles for KafkaEventService dependencies ===== -class FakeEventRepository(EventRepository): - """In-memory event repository for testing.""" - - def __init__(self) -> None: - super().__init__(_test_logger) - self.stored_events: list[DomainEvent] = [] - - async def store_event(self, event: DomainEvent) -> str: - self.stored_events.append(event) - return event.event_id - - class FakeUnifiedProducer(UnifiedProducer): """Fake producer that captures events without Kafka.""" @@ -74,11 +61,9 @@ async def aclose(self) -> None: def create_test_kafka_event_service(event_metrics: EventMetrics) -> tuple[KafkaEventService, FakeUnifiedProducer]: """Create real KafkaEventService with fake dependencies for testing.""" fake_producer = FakeUnifiedProducer() - fake_repo = FakeEventRepository() settings = Settings(config_path="config.test.toml") service = KafkaEventService( - event_repository=fake_repo, kafka_producer=fake_producer, settings=settings, logger=_test_logger, @@ -406,9 +391,7 @@ async def produce( raise RuntimeError("Publish failed") failing_producer = FailingProducer() - fake_repo = FakeEventRepository() failing_service = KafkaEventService( - event_repository=fake_repo, kafka_producer=failing_producer, settings=Settings(config_path="config.test.toml"), logger=_test_logger, From 333c86f0d15ba3c51ba490bf1d2b50ecb6fab3f3 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 21:57:15 +0100 Subject: [PATCH 04/10] fix: avro for dlq --- backend/app/dlq/manager.py | 19 ++---------- backend/app/events/core/producer.py | 48 ++++++++++++----------------- backend/app/events/handlers.py | 34 ++++++++++++-------- 3 files changed, 43 insertions(+), 58 deletions(-) diff --git a/backend/app/dlq/manager.py b/backend/app/dlq/manager.py index 17232b8c..5cf21e36 100644 --- a/backend/app/dlq/manager.py +++ b/backend/app/dlq/manager.py @@ -1,7 +1,6 @@ -import json import logging from datetime import datetime, timezone -from typing import Any, Callable +from typing import Callable from faststream.kafka import KafkaBroker @@ -122,18 +121,6 @@ async def process_monitoring_cycle(self) -> None: await self.process_due_retries() await self.update_queue_metrics() - def parse_kafka_message(self, msg: Any) -> DLQMessage: - """Parse a raw Kafka ConsumerRecord into a DLQMessage.""" - data = json.loads(msg.value) - headers = {k: v.decode() for k, v in (msg.headers or [])} - return DLQMessage(**data, dlq_offset=msg.offset, dlq_partition=msg.partition, headers=headers) - - def parse_dlq_body( - self, data: dict[str, Any], offset: int, partition: int, headers: dict[str, str] - ) -> DLQMessage: - """Parse a deserialized DLQ message body into a DLQMessage.""" - return DLQMessage(**data, dlq_offset=offset, dlq_partition=partition, headers=headers) - async def handle_message(self, message: DLQMessage) -> None: """Process a single DLQ message: filter → store → decide retry/discard.""" for filter_func in self._filters: @@ -166,14 +153,14 @@ async def retry_message(self, message: DLQMessage) -> None: retry_topic = f"{message.original_topic}{self.retry_topic_suffix}" hdrs: dict[str, str] = { + "event_type": message.event.event_type, "dlq_retry_count": str(message.retry_count + 1), "dlq_original_error": message.error, "dlq_retry_timestamp": datetime.now(timezone.utc).isoformat(), } hdrs = inject_trace_context(hdrs) - event = message.event - serialized = json.dumps(event.model_dump(mode="json")).encode() + serialized = await self.schema_registry.serialize_event(message.event) await self._broker.publish( message=serialized, diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py index 1a9ed1ac..5f2523a9 100644 --- a/backend/app/events/core/producer.py +++ b/backend/app/events/core/producer.py @@ -1,5 +1,4 @@ import asyncio -import json import logging import socket from datetime import datetime, timezone @@ -9,7 +8,7 @@ from app.core.metrics import EventMetrics from app.core.tracing.utils import inject_trace_context from app.db.repositories.event_repository import EventRepository -from app.dlq.models import DLQMessage, DLQMessageStatus +from app.dlq.models import DLQMessageStatus from app.domain.enums.kafka import KafkaTopic from app.domain.events.typed import DomainEvent from app.events.schema.schema_registry import SchemaRegistryManager @@ -71,44 +70,35 @@ async def produce(self, event_to_produce: DomainEvent, key: str) -> None: async def send_to_dlq( self, original_event: DomainEvent, original_topic: str, error: Exception, retry_count: int = 0 ) -> None: - """Send a failed event to the Dead Letter Queue.""" + """Send a failed event to the Dead Letter Queue. + + The event body is Avro-encoded (same as every other topic). + DLQ metadata is carried in Kafka headers. + """ try: current_task = asyncio.current_task() task_name = current_task.get_name() if current_task else "main" producer_id = f"{socket.gethostname()}-{task_name}" - dlq_message = DLQMessage( - event=original_event, - original_topic=original_topic, - error=str(error), - retry_count=retry_count, - failed_at=datetime.now(timezone.utc), - status=DLQMessageStatus.PENDING, - producer_id=producer_id, - ) - - dlq_event_data = { - "event": dlq_message.event.model_dump(mode="json"), - "original_topic": dlq_message.original_topic, - "error": dlq_message.error, - "retry_count": dlq_message.retry_count, - "failed_at": dlq_message.failed_at.isoformat(), - "producer_id": dlq_message.producer_id, - "status": str(dlq_message.status), - } - - serialized_value = json.dumps(dlq_event_data).encode("utf-8") + serialized_value = await self._schema_registry.serialize_event(original_event) dlq_topic = f"{self._topic_prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}" + headers = inject_trace_context({ + "event_type": original_event.event_type, + "original_topic": original_topic, + "error_type": type(error).__name__, + "error": str(error), + "retry_count": str(retry_count), + "failed_at": datetime.now(timezone.utc).isoformat(), + "status": str(DLQMessageStatus.PENDING), + "producer_id": producer_id, + }) + await self._broker.publish( message=serialized_value, topic=dlq_topic, key=original_event.event_id.encode() if original_event.event_id else None, - headers={ - "original_topic": original_topic, - "error_type": type(error).__name__, - "retry_count": str(retry_count), - }, + headers=headers, ) self._event_metrics.record_kafka_message_produced(dlq_topic) diff --git a/backend/app/events/handlers.py b/backend/app/events/handlers.py index 4179e1bd..d067cebf 100644 --- a/backend/app/events/handlers.py +++ b/backend/app/events/handlers.py @@ -1,5 +1,4 @@ import asyncio -import json import logging from collections.abc import Awaitable, Callable from datetime import datetime, timezone @@ -13,6 +12,7 @@ from app.core.tracing import EventAttributes from app.core.tracing.utils import extract_trace_context, get_tracer from app.dlq.manager import DLQManager +from app.dlq.models import DLQMessage, DLQMessageStatus from app.domain.enums.events import EventType from app.domain.enums.kafka import CONSUMER_GROUP_SUBSCRIPTIONS, GroupId, KafkaTopic from app.domain.events.typed import ( @@ -290,23 +290,19 @@ async def on_unhandled(body: DomainEvent) -> None: def register_dlq_subscriber(broker: KafkaBroker, settings: Settings) -> None: """Register a DLQ subscriber that consumes dead-letter messages. - DLQ messages are plain JSON (not Avro), so a custom decoder is used - to bypass the broker-level Avro decoder. + DLQ messages are Avro-encoded DomainEvents (same as every other topic). + DLQ metadata (original_topic, error, retry_count, etc.) lives in Kafka headers. """ topic_name = f"{settings.KAFKA_TOPIC_PREFIX}{KafkaTopic.DEAD_LETTER_QUEUE}" - async def dlq_json_decoder(msg: StreamMessage[Any]) -> dict[str, Any]: - return json.loads(msg.body) # type: ignore[no-any-return] - @broker.subscriber( topic_name, group_id=GroupId.DLQ_MANAGER, ack_policy=AckPolicy.ACK, auto_offset_reset="earliest", - decoder=dlq_json_decoder, ) async def on_dlq_message( - body: dict[str, Any], + body: DomainEvent, msg: StreamMessage[Any], manager: FromDishka[DLQManager], logger: FromDishka[logging.Logger], @@ -314,22 +310,34 @@ async def on_dlq_message( start = asyncio.get_running_loop().time() raw = msg.raw_message headers = {k: v.decode() for k, v in (raw.headers or [])} - dlq_msg = manager.parse_dlq_body(body, raw.offset, raw.partition, headers) - ctx = extract_trace_context(dlq_msg.headers) + dlq_msg = DLQMessage( + event=body, + original_topic=headers.get("original_topic", ""), + error=headers.get("error", "Unknown error"), + retry_count=int(headers.get("retry_count", "0")), + failed_at=datetime.fromisoformat(headers["failed_at"]), + status=DLQMessageStatus(headers.get("status", "pending")), + producer_id=headers.get("producer_id", "unknown"), + dlq_offset=raw.offset, + dlq_partition=raw.partition, + headers=headers, + ) + + ctx = extract_trace_context(headers) with get_tracer().start_as_current_span( name="dlq.consume", context=ctx, kind=SpanKind.CONSUMER, attributes={ EventAttributes.KAFKA_TOPIC: str(manager.dlq_topic), - EventAttributes.EVENT_TYPE: dlq_msg.event.event_type, - EventAttributes.EVENT_ID: dlq_msg.event.event_id, + EventAttributes.EVENT_TYPE: body.event_type, + EventAttributes.EVENT_ID: body.event_id, }, ): await manager.handle_message(dlq_msg) - manager.metrics.record_dlq_message_received(dlq_msg.original_topic, dlq_msg.event.event_type) + manager.metrics.record_dlq_message_received(dlq_msg.original_topic, body.event_type) manager.metrics.record_dlq_message_age( (datetime.now(timezone.utc) - dlq_msg.failed_at).total_seconds() ) From ed71b67a8df163486f234b44eb0009c966a5f581 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 22:40:18 +0100 Subject: [PATCH 05/10] fix: misc --- backend/app/db/repositories/event_repository.py | 8 +++++++- backend/app/events/core/producer.py | 4 ++-- backend/app/services/execution_service.py | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/backend/app/db/repositories/event_repository.py b/backend/app/db/repositories/event_repository.py index 3fc62527..17598020 100644 --- a/backend/app/db/repositories/event_repository.py +++ b/backend/app/db/repositories/event_repository.py @@ -161,13 +161,19 @@ async def get_events_by_user( return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] async def get_execution_events( - self, execution_id: str, limit: int = 100, skip: int = 0, exclude_system_events: bool = False + self, + execution_id: str, + limit: int = 100, + skip: int = 0, + exclude_system_events: bool = False, + event_types: list[EventType] | None = None, ) -> EventListResult: conditions: list[Any] = [ Or( EventDocument.execution_id == execution_id, EventDocument.aggregate_id == execution_id, ), + In(EventDocument.event_type, event_types) if event_types else None, Not(RegEx(EventDocument.metadata.service_name, "^system-")) if exclude_system_events else None, ] conditions = [c for c in conditions if c is not None] diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py index 5f2523a9..daad87e2 100644 --- a/backend/app/events/core/producer.py +++ b/backend/app/events/core/producer.py @@ -81,7 +81,7 @@ async def send_to_dlq( producer_id = f"{socket.gethostname()}-{task_name}" serialized_value = await self._schema_registry.serialize_event(original_event) - dlq_topic = f"{self._topic_prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}" + dlq_topic = f"{self._topic_prefix}{KafkaTopic.DEAD_LETTER_QUEUE}" headers = inject_trace_context({ "event_type": original_event.event_type, @@ -90,7 +90,7 @@ async def send_to_dlq( "error": str(error), "retry_count": str(retry_count), "failed_at": datetime.now(timezone.utc).isoformat(), - "status": str(DLQMessageStatus.PENDING), + "status": DLQMessageStatus.PENDING, "producer_id": producer_id, }) diff --git a/backend/app/services/execution_service.py b/backend/app/services/execution_service.py index 35cac84c..59a7b556 100644 --- a/backend/app/services/execution_service.py +++ b/backend/app/services/execution_service.py @@ -303,7 +303,9 @@ async def get_execution_events( Returns: List of events for the execution. """ - result = await self.event_repository.get_execution_events(execution_id=execution_id, limit=limit) + result = await self.event_repository.get_execution_events( + execution_id=execution_id, event_types=event_types, limit=limit, + ) events = result.events self.logger.debug( From c4f1f86fab2306dda60f40f0298ae8da1cb47b77 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 22:40:36 +0100 Subject: [PATCH 06/10] fix: misc --- backend/config.test.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/config.test.toml b/backend/config.test.toml index ea1f11db..35829464 100644 --- a/backend/config.test.toml +++ b/backend/config.test.toml @@ -12,9 +12,9 @@ KUBERNETES_CONFIG_PATH = "/app/kubeconfig.yaml" KUBERNETES_CA_CERTIFICATE_PATH = "/app/certs/k8s-ca.pem" K8S_POD_CPU_LIMIT = "1000m" K8S_POD_MEMORY_LIMIT = "128Mi" -K8S_POD_CPU_REQUEST = "50m" +K8S_POD_CPU_REQUEST = "200m" K8S_POD_MEMORY_REQUEST = "128Mi" -K8S_POD_EXECUTION_TIMEOUT = 10 +K8S_POD_EXECUTION_TIMEOUT = 5 K8S_NAMESPACE = "integr8scode" RATE_LIMITS = "99999/second" From b8c0123e9c4fed18c626c29f54a0e55858e66eb4 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 23:12:12 +0100 Subject: [PATCH 07/10] fix: waiting explicitly for daemonpuller --- .github/actions/e2e-ready/action.yml | 4 + backend/app/services/k8s_worker/worker.py | 115 ++++++++++++---------- backend/workers/run_k8s_worker.py | 6 +- 3 files changed, 69 insertions(+), 56 deletions(-) diff --git a/.github/actions/e2e-ready/action.yml b/.github/actions/e2e-ready/action.yml index 43073d27..cf6b7e33 100644 --- a/.github/actions/e2e-ready/action.yml +++ b/.github/actions/e2e-ready/action.yml @@ -71,6 +71,10 @@ runs: echo "Frontend ready" fi + - name: Wait for runtime images + shell: bash + run: kubectl -n integr8scode rollout status daemonset/runtime-image-pre-puller + - name: Seed test users shell: bash run: docker compose exec -T backend uv run python scripts/seed_users.py diff --git a/backend/app/services/k8s_worker/worker.py b/backend/app/services/k8s_worker/worker.py index 7cf199d2..c12dd2e9 100644 --- a/backend/app/services/k8s_worker/worker.py +++ b/backend/app/services/k8s_worker/worker.py @@ -5,6 +5,7 @@ from typing import Any from kubernetes_asyncio import client as k8s_client +from kubernetes_asyncio import watch as k8s_watch from kubernetes_asyncio.client.rest import ApiException from app.core.metrics import EventMetrics, ExecutionMetrics, KubernetesMetrics @@ -251,63 +252,75 @@ async def wait_for_active_creations(self, timeout: float = 30.0) -> None: self.logger.warning(f"Timeout waiting for pod creations, {len(self._active_creations)} still active") async def ensure_image_pre_puller_daemonset(self) -> None: - """Ensure the runtime image pre-puller DaemonSet exists.""" + """Create/replace the image pre-puller DaemonSet and block until all images are pulled. + + Uses a K8s watch stream — no polling or timeouts. Returns only when + every node reports Ready (all init-container image pulls finished). + """ daemonset_name = "runtime-image-pre-puller" namespace = self._settings.K8S_NAMESPACE - try: - init_containers = [] - all_images = {config.image for lang in RUNTIME_REGISTRY.values() for config in lang.values()} - - for i, image_ref in enumerate(sorted(list(all_images))): - sanitized_image_ref = image_ref.split("/")[-1].replace(":", "-").replace(".", "-").replace("_", "-") - self.logger.info(f"DAEMONSET: before: {image_ref} -> {sanitized_image_ref}") - container_name = f"pull-{i}-{sanitized_image_ref}" - init_containers.append( - { - "name": container_name, - "image": image_ref, - "command": ["/bin/sh", "-c", f'echo "Image {image_ref} pulled."'], - "imagePullPolicy": "Always", - } - ) + init_containers = [] + all_images = {config.image for lang in RUNTIME_REGISTRY.values() for config in lang.values()} + + for i, image_ref in enumerate(sorted(list(all_images))): + sanitized_image_ref = image_ref.split("/")[-1].replace(":", "-").replace(".", "-").replace("_", "-") + container_name = f"pull-{i}-{sanitized_image_ref}" + init_containers.append( + { + "name": container_name, + "image": image_ref, + "command": ["/bin/sh", "-c", f'echo "Image {image_ref} pulled."'], + "imagePullPolicy": "Always", + } + ) - manifest: dict[str, Any] = { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "metadata": {"name": daemonset_name, "namespace": namespace}, - "spec": { - "selector": {"matchLabels": {"name": daemonset_name}}, - "template": { - "metadata": {"labels": {"name": daemonset_name}}, - "spec": { - "initContainers": init_containers, - "containers": [{"name": "pause", "image": "registry.k8s.io/pause:3.9"}], - "tolerations": [{"operator": "Exists"}], - }, + self.logger.info(f"Pre-pulling {len(all_images)} runtime images via DaemonSet") + + manifest: dict[str, Any] = { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "metadata": {"name": daemonset_name, "namespace": namespace}, + "spec": { + "selector": {"matchLabels": {"name": daemonset_name}}, + "template": { + "metadata": {"labels": {"name": daemonset_name}}, + "spec": { + "initContainers": init_containers, + "containers": [{"name": "pause", "image": "registry.k8s.io/pause:3.9"}], + "tolerations": [{"operator": "Exists"}], }, - "updateStrategy": {"type": "RollingUpdate"}, }, - } + "updateStrategy": {"type": "RollingUpdate"}, + }, + } - try: - await self.apps_v1.read_namespaced_daemon_set(name=daemonset_name, namespace=namespace) - self.logger.info(f"DaemonSet '{daemonset_name}' exists. Replacing to ensure it is up-to-date.") - await self.apps_v1.replace_namespaced_daemon_set( - name=daemonset_name, namespace=namespace, body=manifest # type: ignore[arg-type] + try: + await self.apps_v1.read_namespaced_daemon_set(name=daemonset_name, namespace=namespace) + self.logger.info(f"DaemonSet '{daemonset_name}' exists. Replacing to ensure it is up-to-date.") + await self.apps_v1.replace_namespaced_daemon_set( + name=daemonset_name, namespace=namespace, body=manifest # type: ignore[arg-type] + ) + except ApiException as e: + if e.status == 404: + self.logger.info(f"DaemonSet '{daemonset_name}' not found. Creating...") + await self.apps_v1.create_namespaced_daemon_set( + namespace=namespace, body=manifest # type: ignore[arg-type] ) - self.logger.info(f"DaemonSet '{daemonset_name}' replaced successfully.") - except ApiException as e: - if e.status == 404: - self.logger.info(f"DaemonSet '{daemonset_name}' not found. Creating...") - await self.apps_v1.create_namespaced_daemon_set( - namespace=namespace, body=manifest # type: ignore[arg-type] - ) - self.logger.info(f"DaemonSet '{daemonset_name}' created successfully.") - else: - raise + else: + raise - except ApiException as e: - self.logger.error(f"K8s API error applying DaemonSet '{daemonset_name}': {e.reason}", exc_info=True) - except Exception as e: - self.logger.error(f"Unexpected error applying image-puller DaemonSet: {e}", exc_info=True) + # Block on a watch stream until every node has pulled all images + w = k8s_watch.Watch() + async for event in w.stream( + self.apps_v1.list_namespaced_daemon_set, + namespace=namespace, + field_selector=f"metadata.name={daemonset_name}", + ): + ds = event["object"] + desired = ds.status.desired_number_scheduled or 0 + ready = ds.status.number_ready or 0 + self.logger.info(f"DaemonSet '{daemonset_name}': {ready}/{desired} pods ready") + if desired > 0 and ready >= desired: + await w.close() + return diff --git a/backend/workers/run_k8s_worker.py b/backend/workers/run_k8s_worker.py index 3044625b..489b8ee6 100644 --- a/backend/workers/run_k8s_worker.py +++ b/backend/workers/run_k8s_worker.py @@ -47,13 +47,9 @@ def main() -> None: @app.on_startup async def startup() -> None: await container.get(Database) # triggers init_beanie inside provider - logger.info("KubernetesWorker infrastructure initialized") - - @app.after_startup - async def bootstrap() -> None: worker = await container.get(KubernetesWorker) await worker.ensure_image_pre_puller_daemonset() - logger.info("Image pre-puller daemonset applied") + logger.info("KubernetesWorker ready — all runtime images pre-pulled") @app.on_shutdown async def shutdown() -> None: From eda7419d88d3858d3aa756aa5f5c580b8fdae1dc Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 23:36:27 +0100 Subject: [PATCH 08/10] fix: another wait checker for daemonpuller --- .github/actions/e2e-ready/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e-ready/action.yml b/.github/actions/e2e-ready/action.yml index cf6b7e33..1bbdac05 100644 --- a/.github/actions/e2e-ready/action.yml +++ b/.github/actions/e2e-ready/action.yml @@ -73,7 +73,7 @@ runs: - name: Wait for runtime images shell: bash - run: kubectl -n integr8scode rollout status daemonset/runtime-image-pre-puller + run: docker compose logs k8s-worker --follow 2>&1 | grep -qm1 "all runtime images pre-pulled" - name: Seed test users shell: bash From 68002107565a47a4c1321bcbea5c0cf5665e5ee6 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Wed, 4 Feb 2026 23:58:09 +0100 Subject: [PATCH 09/10] fix: prepull of python3.11, deploy without wait --- .github/actions/e2e-ready/action.yml | 8 ++++---- backend/app/services/k8s_worker/worker.py | 25 +++++++---------------- backend/workers/run_k8s_worker.py | 5 ++++- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/.github/actions/e2e-ready/action.yml b/.github/actions/e2e-ready/action.yml index 1bbdac05..33927c10 100644 --- a/.github/actions/e2e-ready/action.yml +++ b/.github/actions/e2e-ready/action.yml @@ -32,6 +32,10 @@ runs: cp backend/config.test.toml backend/config.toml cp backend/secrets.example.toml backend/secrets.toml + - name: Pre-pull test runtime image into K3s + shell: bash + run: sudo k3s ctr images pull docker.io/library/python:3.11-slim + - name: Wait for image pull and infra shell: bash run: | @@ -71,10 +75,6 @@ runs: echo "Frontend ready" fi - - name: Wait for runtime images - shell: bash - run: docker compose logs k8s-worker --follow 2>&1 | grep -qm1 "all runtime images pre-pulled" - - name: Seed test users shell: bash run: docker compose exec -T backend uv run python scripts/seed_users.py diff --git a/backend/app/services/k8s_worker/worker.py b/backend/app/services/k8s_worker/worker.py index c12dd2e9..b7bdba7f 100644 --- a/backend/app/services/k8s_worker/worker.py +++ b/backend/app/services/k8s_worker/worker.py @@ -5,7 +5,6 @@ from typing import Any from kubernetes_asyncio import client as k8s_client -from kubernetes_asyncio import watch as k8s_watch from kubernetes_asyncio.client.rest import ApiException from app.core.metrics import EventMetrics, ExecutionMetrics, KubernetesMetrics @@ -252,10 +251,13 @@ async def wait_for_active_creations(self, timeout: float = 30.0) -> None: self.logger.warning(f"Timeout waiting for pod creations, {len(self._active_creations)} still active") async def ensure_image_pre_puller_daemonset(self) -> None: - """Create/replace the image pre-puller DaemonSet and block until all images are pulled. + """Create or replace the image pre-puller DaemonSet (fire-and-forget). - Uses a K8s watch stream — no polling or timeouts. Returns only when - every node reports Ready (all init-container image pulls finished). + The DaemonSet pulls all runtime images onto every node in the background. + This method returns immediately after the DaemonSet is applied — it does + NOT wait for images to finish pulling. In CI, the test-critical image + (python:3.11-slim) is pre-pulled directly into K3s containerd before the + stack starts, so execution pods never hit a cold pull. """ daemonset_name = "runtime-image-pre-puller" namespace = self._settings.K8S_NAMESPACE @@ -310,17 +312,4 @@ async def ensure_image_pre_puller_daemonset(self) -> None: else: raise - # Block on a watch stream until every node has pulled all images - w = k8s_watch.Watch() - async for event in w.stream( - self.apps_v1.list_namespaced_daemon_set, - namespace=namespace, - field_selector=f"metadata.name={daemonset_name}", - ): - ds = event["object"] - desired = ds.status.desired_number_scheduled or 0 - ready = ds.status.number_ready or 0 - self.logger.info(f"DaemonSet '{daemonset_name}': {ready}/{desired} pods ready") - if desired > 0 and ready >= desired: - await w.close() - return + self.logger.info(f"DaemonSet '{daemonset_name}' applied — images will pull in background") diff --git a/backend/workers/run_k8s_worker.py b/backend/workers/run_k8s_worker.py index 489b8ee6..457d04ab 100644 --- a/backend/workers/run_k8s_worker.py +++ b/backend/workers/run_k8s_worker.py @@ -47,9 +47,12 @@ def main() -> None: @app.on_startup async def startup() -> None: await container.get(Database) # triggers init_beanie inside provider + logger.info("KubernetesWorker ready") + + @app.after_startup + async def after_startup() -> None: worker = await container.get(KubernetesWorker) await worker.ensure_image_pre_puller_daemonset() - logger.info("KubernetesWorker ready — all runtime images pre-pulled") @app.on_shutdown async def shutdown() -> None: From 20288b5c15d5f5f8e2cad5a498f339fdbec4a507 Mon Sep 17 00:00:00 2001 From: HardMax71 Date: Thu, 5 Feb 2026 16:36:40 +0100 Subject: [PATCH 10/10] Yes, FastStream natively handles Pydantic models over Kafka without any Avro/Schema Registry. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipeline: Publishing (broker.publish(message=pydantic_model, topic=...)): 1. FastStream's PydanticSerializer.encode() calls pydantic_core.to_json(model) → raw JSON bytes 2. Adds content-type: application/json header 3. Sends to Kafka via aiokafka Consuming (handler with body: SomePydanticModel): 1. Raw bytes → json.loads() → Python dict 2. fast_depends PydanticSerializer validates the dict against the handler's type annotation → typed Pydantic model instance 3. Discriminated unions (Annotated[Union[...], Discriminator("event_type")]) work natively because Pydantic resolves the correct concrete type from the discriminator field Verified by tracing through the actual installed library code at .venv/lib/python3.12/site-packages/faststream/ and .venv/lib/python3.12/site-packages/fast_depends/. One thing that needs fixing: the DLQ handler's msg: StreamMessage[Any] parameter. Without the custom decoder, FastStream treats it as a body parameter (not context injection), causing the multi-param unpacking failure. The standard FastStream fix is msg: StreamMessage[Any] = Context("message") — this is not a custom workaround, it's the documented way to access the raw StreamMessage alongside a typed body parameter. What you lose by dropping Avro: - Schema Registry compatibility checks (schema evolution enforcement) - Compact binary encoding (Avro is ~2-3x smaller than JSON) What you gain: - No Schema Registry dependency - No pydantic-avro dependency - No custom decoder that breaks FastStream's native parameter resolution - DLQ handler works correctly - Simpler codebase — FastStream does all the work Avro → JSON migration (core changes): 1. app/domain/events/typed.py — Replaced AvroBase (from pydantic-avro) with BaseModel on all 5 classes: ResourceUsageDomain, EventMetadata, BaseEvent, ContainerStatusInfo, ArchivedEvent 2. app/events/broker.py — Removed the custom avro_decoder and SchemaRegistryManager param. Broker now uses FastStream's native JSON serialization 3. app/events/core/producer.py — Removed SchemaRegistryManager dependency. produce() and send_to_dlq() now publish Pydantic models directly via broker.publish(message=event, ...) 4. app/dlq/manager.py — Removed SchemaRegistryManager dependency. retry_message() and _produce_dlq_event() publish events directly 5. app/events/handlers.py — Added Context("message") for DLQ handler's StreamMessage[Any] parameter (standard FastStream pattern for accessing raw message alongside typed body) 6. app/core/providers.py — Deleted EventProvider class entirely. Removed schema_registry param from MessagingProvider, DLQProvider, DLQWorkerProvider 7. app/core/container.py — Removed EventProvider from all 8 container factory functions 8. app/main.py — Removed SchemaRegistryManager creation 9. All 7 worker entry points — Removed SchemaRegistryManager import/creation, simplified create_broker() calls Deleted files: - app/events/schema/schema_registry.py — dead code - tests/e2e/events/test_schema_registry_roundtrip.py — tests deleted functionality - tests/e2e/events/test_schema_registry_real.py — tests deleted functionality Dependencies removed (from pyproject.toml): - pydantic-avro==0.9.1 - python-schema-registry-client==2.6.1 - avro-python3==1.10.2 - fastavro==1.12.1 - (plus 4 transitive deps: jsonschema, jsonschema-specifications, referencing, rpds-py) CI fix: - e2e-ready/action.yml — Changed sudo k3s ctr images pull → sudo k3s crictl pull (uses CRI interface, targets correct k8s.io containerd namespace) Config: - Added faststream.Context to ruff's extend-immutable-calls Good question. Let me look at industry practices: 1 Topic = 1 Event Type (fine-grained) - Confluent's modern recommendation for event-driven microservices - Schema evolution is simpler (1 schema per topic) - No filtering needed - Kafka does routing - Better observability (metrics/alerts per event type) - Modern Kafka handles 10,000+ topics easily 1 Topic = N Event Types (coarse-grained) - Traditional approach when Kafka topics were "expensive" - Preserves ordering across event types (important for event sourcing) - Requires filtering (header/body based) - Common in DDD: 1 topic per aggregate (all Order events together) The deciding question: Do you need ordering across event types? Example: If EXECUTION_REQUESTED → EXECUTION_STARTED → EXECUTION_COMPLETED must be processed in order for the same execution_id, they should share a topic (partitioned by execution_id). But looking at your architecture: - Coordinator handles: REQUESTED, COMPLETED, FAILED, CANCELLED - Result processor handles: COMPLETED, FAILED, TIMEOUT - Different services, different events - no cross-event ordering needed Modern consensus: Fine-grained topics (1:1) unless you specifically need cross-event ordering guarantees. Companies like Uber, Netflix, LinkedIn moved toward more granular topics as Kafka matured. Want me to refactor to 1 topic = 1 event type? It would: - Eliminate all filtering - Simplify handlers to pure type hints - Make EVENT_TYPE_TO_TOPIC trivial (topic = event_type.value) You're right. If 1 topic = 1 type: - Handler subscribes to topic execution_requested with type ExecutionRequested - Topic IS the type - no event_type field needed for routing - FastStream deserializes based on type hint The only question: when storing/retrieving from DB, how do we know the type? Answer: Store topic name as metadata, not as event field: ● Bash(uv run python << 'EOF' timeout: 30s """Test: No event_type field at all - topic is the type"""…) ⎿  Serialized (no event_type): {"event_id":"","execution_id":"123","script":"print('hi')"} Topic: execution_requested … +25 lines (ctrl+o to expand) ● Final architecture - minimal: class BaseEvent(BaseModel): event_id: str = "" timestamp: datetime = ... metadata: EventMetadata @classmethod def topic(cls, prefix: str = "") -> str: return f"{prefix}{to_snake_case(cls.__name__)}" class ExecutionRequested(BaseEvent): execution_id: str script: str # Handler - topic IS the type @broker.subscriber(ExecutionRequested.topic(prefix), group_id="coordinator") async def handle(event: ExecutionRequested): ... # Producer - topic from class await broker.publish(event, topic=type(event).topic(prefix)) # DB storage - topic as document metadata, not event field {"topic": "execution_requested", "data": {...}} Removed: event_type field, EventType enum, DomainEvent, registry, filters, discriminators Kept: BaseEvent, specific classes, topic() classmethod man that's a mess --- .github/actions/e2e-ready/action.yml | 2 +- backend/app/api/routes/admin/events.py | 9 +- backend/app/api/routes/dlq.py | 4 +- backend/app/api/routes/events.py | 59 ++- backend/app/api/routes/execution.py | 39 +- backend/app/core/container.py | 14 +- backend/app/core/providers.py | 76 ++-- backend/app/db/docs/dlq.py | 8 +- backend/app/db/docs/event.py | 22 +- .../admin/admin_events_repository.py | 24 +- backend/app/db/repositories/dlq_repository.py | 15 +- .../app/db/repositories/event_repository.py | 99 ++--- .../repositories/user_settings_repository.py | 5 +- backend/app/dlq/manager.py | 25 +- backend/app/dlq/models.py | 8 +- backend/app/domain/admin/overview_models.py | 4 +- backend/app/domain/enums/kafka.py | 134 +------ backend/app/domain/events/__init__.py | 16 +- backend/app/domain/events/event_models.py | 29 +- backend/app/domain/events/typed.py | 214 ++--------- backend/app/domain/user/settings_models.py | 5 +- backend/app/events/broker.py | 27 -- backend/app/events/core/__init__.py | 6 +- backend/app/events/core/producer.py | 133 +++---- backend/app/events/handlers.py | 353 +++++++++--------- backend/app/events/schema/schema_registry.py | 28 -- backend/app/infrastructure/kafka/__init__.py | 19 +- backend/app/infrastructure/kafka/mappings.py | 104 ------ backend/app/infrastructure/kafka/topics.py | 201 ---------- backend/app/main.py | 6 +- backend/app/schemas_pydantic/admin_events.py | 13 +- .../schemas_pydantic/admin_user_overview.py | 4 +- backend/app/schemas_pydantic/dlq.py | 6 +- backend/app/schemas_pydantic/events.py | 45 ++- backend/app/schemas_pydantic/sse.py | 2 +- .../services/admin/admin_events_service.py | 1 - .../app/services/admin/admin_user_service.py | 17 +- .../app/services/coordinator/coordinator.py | 19 +- .../services/event_replay/replay_service.py | 22 +- backend/app/services/event_service.py | 23 +- backend/app/services/execution_service.py | 53 ++- backend/app/services/idempotency/__init__.py | 2 + .../idempotency/idempotency_manager.py | 77 +++- .../app/services/idempotency/middleware.py | 109 ++++++ backend/app/services/k8s_worker/worker.py | 19 +- backend/app/services/kafka_event_service.py | 164 -------- backend/app/services/notification_service.py | 3 - backend/app/services/pod_monitor/config.py | 12 +- .../app/services/pod_monitor/event_mapper.py | 18 +- backend/app/services/pod_monitor/monitor.py | 16 +- .../services/result_processor/processor.py | 29 +- backend/app/services/saga/execution_saga.py | 12 +- .../app/services/saga/saga_orchestrator.py | 32 +- backend/app/services/saga/saga_step.py | 4 +- backend/app/services/sse/redis_bus.py | 49 +-- backend/app/services/user_settings_service.py | 43 +-- backend/pyproject.toml | 6 +- backend/scripts/create_topics.py | 96 ++++- backend/tests/e2e/app/test_main_app.py | 8 - backend/tests/e2e/core/test_container.py | 15 +- .../tests/e2e/core/test_dishka_lifespan.py | 10 - backend/tests/e2e/core/test_middlewares.py | 1 - backend/tests/e2e/dlq/test_dlq_discard.py | 4 +- backend/tests/e2e/dlq/test_dlq_manager.py | 27 +- backend/tests/e2e/dlq/test_dlq_retry.py | 4 +- .../e2e/events/test_producer_roundtrip.py | 10 +- .../e2e/events/test_schema_registry_real.py | 29 -- .../events/test_schema_registry_roundtrip.py | 25 -- .../idempotency/test_consumer_idempotent.py | 1 + .../tests/e2e/idempotency/test_idempotency.py | 2 +- .../result_processor/test_result_processor.py | 8 +- .../events/test_kafka_event_service.py | 67 ---- .../execution/test_execution_service.py | 16 +- .../idempotency/test_redis_repository.py | 1 - .../services/replay/test_replay_service.py | 3 +- .../sse/test_partitioned_event_router.py | 2 +- backend/tests/e2e/test_admin_events_routes.py | 27 +- backend/tests/e2e/test_events_routes.py | 26 +- backend/tests/e2e/test_execution_routes.py | 17 +- .../tests/e2e/test_k8s_worker_create_pod.py | 4 +- backend/tests/load/plot_report.py | 2 +- .../events/test_event_schema_coverage.py | 251 +++++-------- .../unit/events/test_mappings_and_types.py | 33 +- .../events/test_schema_registry_manager.py | 67 ++-- .../schemas_pydantic/test_events_schemas.py | 2 +- .../idempotency/test_idempotency_manager.py | 18 +- .../pod_monitor/test_config_and_init.py | 4 +- .../services/pod_monitor/test_event_mapper.py | 25 +- .../unit/services/pod_monitor/test_monitor.py | 107 +++--- .../result_processor/test_processor.py | 28 +- .../saga/test_execution_saga_steps.py | 21 +- .../services/saga/test_saga_comprehensive.py | 6 +- .../saga/test_saga_orchestrator_unit.py | 14 +- .../services/sse/test_kafka_redis_bridge.py | 9 +- backend/uv.lock | 198 ---------- backend/workers/dlq_processor.py | 6 +- backend/workers/run_coordinator.py | 13 +- backend/workers/run_event_replay.py | 6 +- backend/workers/run_k8s_worker.py | 13 +- backend/workers/run_pod_monitor.py | 6 +- backend/workers/run_result_processor.py | 13 +- backend/workers/run_saga_orchestrator.py | 13 +- 102 files changed, 1289 insertions(+), 2457 deletions(-) delete mode 100644 backend/app/events/broker.py delete mode 100644 backend/app/events/schema/schema_registry.py delete mode 100644 backend/app/infrastructure/kafka/mappings.py delete mode 100644 backend/app/infrastructure/kafka/topics.py create mode 100644 backend/app/services/idempotency/middleware.py delete mode 100644 backend/app/services/kafka_event_service.py delete mode 100644 backend/tests/e2e/events/test_schema_registry_real.py delete mode 100644 backend/tests/e2e/events/test_schema_registry_roundtrip.py delete mode 100644 backend/tests/e2e/services/events/test_kafka_event_service.py diff --git a/.github/actions/e2e-ready/action.yml b/.github/actions/e2e-ready/action.yml index 33927c10..15d3e042 100644 --- a/.github/actions/e2e-ready/action.yml +++ b/.github/actions/e2e-ready/action.yml @@ -34,7 +34,7 @@ runs: - name: Pre-pull test runtime image into K3s shell: bash - run: sudo k3s ctr images pull docker.io/library/python:3.11-slim + run: sudo k3s crictl pull docker.io/library/python:3.11-slim - name: Wait for image pull and infra shell: bash diff --git a/backend/app/api/routes/admin/events.py b/backend/app/api/routes/admin/events.py index 3c5e89dc..f0ec1838 100644 --- a/backend/app/api/routes/admin/events.py +++ b/backend/app/api/routes/admin/events.py @@ -8,7 +8,6 @@ from app.api.dependencies import admin_user from app.core.correlation import CorrelationContext -from app.domain.enums.events import EventType from app.domain.events.event_models import EventFilter from app.domain.replay import ReplayFilter from app.schemas_pydantic.admin_events import ( @@ -69,14 +68,14 @@ async def get_event_stats( @router.get("/export/csv") async def export_events_csv( service: FromDishka[AdminEventsService], - event_types: list[EventType] | None = Query(None, description="Event types (repeat param for multiple)"), + topics: list[str] | None = Query(None, description="Event topics (repeat param for multiple)"), start_time: datetime | None = Query(None, description="Start time"), end_time: datetime | None = Query(None, description="End time"), limit: int = Query(default=10000, le=50000), ) -> StreamingResponse: try: export_filter = EventFilter( - event_types=event_types, + topics=topics, start_time=start_time, end_time=end_time, ) @@ -94,7 +93,7 @@ async def export_events_csv( @router.get("/export/json") async def export_events_json( service: FromDishka[AdminEventsService], - event_types: list[EventType] | None = Query(None, description="Event types (repeat param for multiple)"), + topics: list[str] | None = Query(None, description="Event topics (repeat param for multiple)"), aggregate_id: str | None = Query(None, description="Aggregate ID filter"), correlation_id: str | None = Query(None, description="Correlation ID filter"), user_id: str | None = Query(None, description="User ID filter"), @@ -106,7 +105,7 @@ async def export_events_json( """Export events as JSON with comprehensive filtering.""" try: export_filter = EventFilter( - event_types=event_types, + topics=topics, aggregate_id=aggregate_id, correlation_id=correlation_id, user_id=user_id, diff --git a/backend/app/api/routes/dlq.py b/backend/app/api/routes/dlq.py index 9df6963e..738a6f57 100644 --- a/backend/app/api/routes/dlq.py +++ b/backend/app/api/routes/dlq.py @@ -7,7 +7,6 @@ from app.dlq import RetryPolicy from app.dlq.manager import DLQManager from app.dlq.models import DLQMessageStatus -from app.domain.enums.events import EventType from app.schemas_pydantic.dlq import ( DLQBatchRetryResponse, DLQMessageDetail, @@ -36,12 +35,11 @@ async def get_dlq_messages( repository: FromDishka[DLQRepository], status: DLQMessageStatus | None = Query(None), topic: str | None = None, - event_type: EventType | None = Query(None), limit: int = Query(50, ge=1, le=1000), offset: int = Query(0, ge=0), ) -> DLQMessagesResponse: result = await repository.get_messages( - status=status, topic=topic, event_type=event_type, limit=limit, offset=offset + status=status, topic=topic, limit=limit, offset=offset ) # Convert domain messages to response models using model_validate diff --git a/backend/app/api/routes/events.py b/backend/app/api/routes/events.py index 4dbf3ca5..86d4b2e7 100644 --- a/backend/app/api/routes/events.py +++ b/backend/app/api/routes/events.py @@ -11,10 +11,10 @@ from app.core.correlation import CorrelationContext from app.core.utils import get_client_ip from app.domain.enums.common import SortOrder -from app.domain.enums.events import EventType from app.domain.enums.user import UserRole from app.domain.events.event_models import EventFilter -from app.domain.events.typed import BaseEvent, DomainEvent, EventMetadata +from app.domain.events.typed import BaseEvent, EventMetadata +from app.events.core import EventPublisher from app.schemas_pydantic.events import ( DeleteEventResponse, EventAggregationRequest, @@ -28,7 +28,6 @@ from app.schemas_pydantic.user import UserResponse from app.services.event_service import EventService from app.services.execution_service import ExecutionService -from app.services.kafka_event_service import KafkaEventService from app.settings import Settings router = APIRouter(prefix="/events", tags=["events"], route_class=DishkaRoute) @@ -74,7 +73,7 @@ async def get_execution_events( async def get_user_events( current_user: Annotated[UserResponse, Depends(current_user)], event_service: FromDishka[EventService], - event_types: list[EventType] | None = Query(None), + topics: list[str] | None = Query(None), start_time: datetime | None = Query(None), end_time: datetime | None = Query(None), limit: int = Query(100, ge=1, le=1000), @@ -84,7 +83,7 @@ async def get_user_events( """Get events for the current user""" result = await event_service.get_user_events_paginated( user_id=current_user.user_id, - event_types=event_types, + topics=topics, start_time=start_time, end_time=end_time, limit=limit, @@ -108,7 +107,7 @@ async def query_events( event_service: FromDishka[EventService], ) -> EventListResponse: event_filter = EventFilter( - event_types=filter_request.event_types, + topics=filter_request.topics, aggregate_id=filter_request.aggregate_id, correlation_id=filter_request.correlation_id, user_id=filter_request.user_id, @@ -218,10 +217,10 @@ async def get_event_statistics( return EventStatistics.model_validate(stats) -@router.get("/{event_id}", response_model=DomainEvent) +@router.get("/{event_id}", response_model=BaseEvent) async def get_event( event_id: str, current_user: Annotated[UserResponse, Depends(current_user)], event_service: FromDishka[EventService] -) -> DomainEvent: +) -> BaseEvent: """Get a specific event by ID""" event = await event_service.get_event(event_id=event_id, user_id=current_user.user_id, user_role=current_user.role) if event is None: @@ -234,28 +233,30 @@ async def publish_custom_event( admin: Annotated[UserResponse, Depends(admin_user)], event_request: PublishEventRequest, request: Request, - event_service: FromDishka[KafkaEventService], + producer: FromDishka[EventPublisher], settings: FromDishka[Settings], ) -> PublishEventResponse: + """Publish a custom event (admin only). Creates a BaseEvent with the provided payload.""" base_meta = EventMetadata( service_name=settings.SERVICE_NAME, service_version=settings.SERVICE_VERSION, user_id=admin.user_id, ip_address=get_client_ip(request), user_agent=request.headers.get("user-agent"), + correlation_id=event_request.correlation_id or CorrelationContext.get_correlation_id(), ) # Merge any additional metadata provided in request (extra allowed) if event_request.metadata: base_meta = base_meta.model_copy(update=event_request.metadata) - event_id = await event_service.publish_event( - event_type=event_request.event_type, - payload=event_request.payload, + # Create a BaseEvent with the custom payload in metadata + event = BaseEvent( aggregate_id=event_request.aggregate_id, - correlation_id=event_request.correlation_id, metadata=base_meta, ) + event_id = await producer.publish(event=event, key=event_request.aggregate_id) + return PublishEventResponse(event_id=event_id, status="published", timestamp=datetime.now(timezone.utc)) @@ -275,12 +276,12 @@ async def aggregate_events( return result.results -@router.get("/types/list", response_model=list[str]) -async def list_event_types( +@router.get("/topics/list", response_model=list[str]) +async def list_topics( current_user: Annotated[UserResponse, Depends(current_user)], event_service: FromDishka[EventService] ) -> list[str]: - event_types = await event_service.list_event_types(user_id=current_user.user_id, user_role=current_user.role) - return event_types + topics = await event_service.list_topics(user_id=current_user.user_id, user_role=current_user.role) + return topics @router.delete("/{event_id}", response_model=DeleteEventResponse) @@ -300,7 +301,6 @@ async def delete_event( extra={ "event_id": event_id, "admin_email": admin.email, - "event_type": result.event_type, "aggregate_id": result.aggregate_id, "correlation_id": result.metadata.correlation_id, }, @@ -316,8 +316,7 @@ async def replay_aggregate_events( aggregate_id: str, admin: Annotated[UserResponse, Depends(admin_user)], event_service: FromDishka[EventService], - kafka_event_service: FromDishka[KafkaEventService], - settings: FromDishka[Settings], + producer: FromDishka[EventPublisher], logger: FromDishka[logging.Logger], target_service: str | None = Query(None, description="Service to replay events to"), dry_run: bool = Query(True, description="If true, only show what would be replayed"), @@ -331,7 +330,7 @@ async def replay_aggregate_events( dry_run=True, aggregate_id=aggregate_id, event_count=replay_info.event_count, - event_types=replay_info.event_types, + topics=replay_info.topics, start_time=replay_info.start_time, end_time=replay_info.end_time, ) @@ -346,21 +345,9 @@ async def replay_aggregate_events( await asyncio.sleep(0.1) try: - meta = EventMetadata( - service_name=settings.SERVICE_NAME, - service_version=settings.SERVICE_VERSION, - user_id=admin.user_id, - ) - # Extract payload fields (exclude base event fields + event_type discriminator) - base_fields = set(BaseEvent.model_fields.keys()) | {"event_type"} - extra_fields = {k: v for k, v in event.model_dump().items() if k not in base_fields} - await kafka_event_service.publish_event( - event_type=event.event_type, - payload=extra_fields, - aggregate_id=aggregate_id, - correlation_id=replay_correlation_id, - metadata=meta, - ) + # Update correlation_id for replay tracking + event.metadata.correlation_id = replay_correlation_id + await producer.publish(event=event, key=aggregate_id) replayed_count += 1 except Exception as e: logger.error(f"Failed to replay event {event.event_id}: {e}") diff --git a/backend/app/api/routes/execution.py b/backend/app/api/routes/execution.py index 9dd2b6f6..b51de65a 100644 --- a/backend/app/api/routes/execution.py +++ b/backend/app/api/routes/execution.py @@ -9,10 +9,9 @@ from app.api.dependencies import admin_user, current_user from app.core.tracing import EventAttributes, add_span_attributes from app.core.utils import get_client_ip -from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus from app.domain.enums.user import UserRole -from app.domain.events.typed import BaseEvent, DomainEvent, EventMetadata +from app.domain.events.typed import BaseEvent, EventMetadata from app.domain.exceptions import DomainError from app.domain.idempotency import KeyStrategy from app.schemas_pydantic.execution import ( @@ -32,8 +31,6 @@ from app.services.event_service import EventService from app.services.execution_service import ExecutionService from app.services.idempotency import IdempotencyManager -from app.services.kafka_event_service import KafkaEventService -from app.settings import Settings router = APIRouter(route_class=DishkaRoute, tags=["execution"]) @@ -76,10 +73,9 @@ async def create_execution( # Handle idempotency if key provided pseudo_event = None if idempotency_key: - # Create a pseudo-event for idempotency tracking + # Create a pseudo-event for idempotency tracking (just needs event_id for key generation) pseudo_event = BaseEvent( event_id=str(uuid4()), - event_type=EventType.EXECUTION_REQUESTED, timestamp=datetime.now(timezone.utc), metadata=EventMetadata( user_id=current_user.user_id, correlation_id=str(uuid4()), service_name="api", service_version="1.0.0" @@ -160,8 +156,7 @@ async def cancel_execution( execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)], current_user: Annotated[UserResponse, Depends(current_user)], cancel_request: CancelExecutionRequest, - event_service: FromDishka[KafkaEventService], - settings: FromDishka[Settings], + execution_service: FromDishka[ExecutionService], ) -> CancelResponse: # Handle terminal states terminal_states = [ExecutionStatus.COMPLETED, ExecutionStatus.FAILED, ExecutionStatus.TIMEOUT] @@ -175,32 +170,18 @@ async def cancel_execution( execution_id=execution.execution_id, status="already_cancelled", message="Execution was already cancelled", - event_id="-1", # exact event_id unknown ) - payload = { - "execution_id": execution.execution_id, - "status": str(ExecutionStatus.CANCELLED), - "reason": cancel_request.reason or "User requested cancellation", - "previous_status": str(execution.status), - } - meta = EventMetadata( - service_name=settings.SERVICE_NAME, - service_version=settings.SERVICE_VERSION, + await execution_service.cancel_execution( + execution_id=execution.execution_id, + reason=cancel_request.reason or "User requested cancellation", user_id=current_user.user_id, ) - event_id = await event_service.publish_event( - event_type=EventType.EXECUTION_CANCELLED, - payload=payload, - aggregate_id=execution.execution_id, - metadata=meta, - ) return CancelResponse( execution_id=execution.execution_id, status="cancellation_requested", message="Cancellation request submitted", - event_id=event_id, ) @@ -231,16 +212,16 @@ async def retry_execution( return ExecutionResponse.model_validate(new_result) -@router.get("/executions/{execution_id}/events", response_model=list[DomainEvent]) +@router.get("/executions/{execution_id}/events", response_model=list[BaseEvent]) async def get_execution_events( execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)], event_service: FromDishka[EventService], - event_types: list[EventType] | None = Query(None, description="Event types to filter"), + topics: list[str] | None = Query(None, description="Event topics to filter"), limit: int = Query(100, ge=1, le=1000), -) -> list[DomainEvent]: +) -> list[BaseEvent]: """Get all events for an execution.""" events = await event_service.get_events_by_aggregate( - aggregate_id=execution.execution_id, event_types=event_types, limit=limit + aggregate_id=execution.execution_id, topics=topics, limit=limit ) return events diff --git a/backend/app/core/container.py b/backend/app/core/container.py index 44c281d4..ff2aaa84 100644 --- a/backend/app/core/container.py +++ b/backend/app/core/container.py @@ -11,9 +11,9 @@ DatabaseProvider, DLQProvider, DLQWorkerProvider, - EventProvider, EventReplayProvider, EventReplayWorkerProvider, + IdempotencyMiddlewareProvider, K8sWorkerProvider, KafkaServicesProvider, KubernetesProvider, @@ -52,7 +52,6 @@ def create_app_container(settings: Settings, broker: KafkaBroker) -> AsyncContai RepositoryProvider(), MessagingProvider(), DLQProvider(), - EventProvider(), SagaOrchestratorProvider(), KafkaServicesProvider(), SSEProvider(), @@ -79,8 +78,8 @@ def create_result_processor_container(settings: Settings, broker: KafkaBroker) - CoreServicesProvider(), MetricsProvider(), RepositoryProvider(), - EventProvider(), MessagingProvider(), + IdempotencyMiddlewareProvider(), DLQProvider(), ResultProcessorProvider(), context={Settings: settings, KafkaBroker: broker}, @@ -98,8 +97,8 @@ def create_coordinator_container(settings: Settings, broker: KafkaBroker) -> Asy MetricsProvider(), RepositoryProvider(), MessagingProvider(), + IdempotencyMiddlewareProvider(), DLQProvider(), - EventProvider(), CoordinatorProvider(), context={Settings: settings, KafkaBroker: broker}, ) @@ -116,8 +115,8 @@ def create_k8s_worker_container(settings: Settings, broker: KafkaBroker) -> Asyn MetricsProvider(), RepositoryProvider(), MessagingProvider(), + IdempotencyMiddlewareProvider(), DLQProvider(), - EventProvider(), KubernetesProvider(), K8sWorkerProvider(), context={Settings: settings, KafkaBroker: broker}, @@ -136,7 +135,6 @@ def create_pod_monitor_container(settings: Settings, broker: KafkaBroker) -> Asy RepositoryProvider(), MessagingProvider(), DLQProvider(), - EventProvider(), KafkaServicesProvider(), KubernetesProvider(), PodMonitorProvider(), @@ -158,8 +156,8 @@ def create_saga_orchestrator_container(settings: Settings, broker: KafkaBroker) MetricsProvider(), RepositoryProvider(), MessagingProvider(), + IdempotencyMiddlewareProvider(), DLQProvider(), - EventProvider(), SagaWorkerProvider(), context={Settings: settings, KafkaBroker: broker}, ) @@ -180,7 +178,6 @@ def create_event_replay_container(settings: Settings, broker: KafkaBroker) -> As RepositoryProvider(), MessagingProvider(), DLQProvider(), - EventProvider(), EventReplayWorkerProvider(), context={Settings: settings, KafkaBroker: broker}, ) @@ -202,6 +199,5 @@ def create_dlq_processor_container(settings: Settings, broker: KafkaBroker) -> A RepositoryProvider(), MessagingProvider(), DLQWorkerProvider(), - EventProvider(), context={Settings: settings, KafkaBroker: broker}, ) diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py index ae6579fa..c30f7ca8 100644 --- a/backend/app/core/providers.py +++ b/backend/app/core/providers.py @@ -51,8 +51,7 @@ from app.dlq.manager import DLQManager from app.domain.rate_limit import RateLimitConfig from app.domain.saga.models import SagaConfig -from app.events.core import UnifiedProducer -from app.events.schema.schema_registry import SchemaRegistryManager +from app.events.core import EventPublisher from app.services.admin import AdminEventsService, AdminSettingsService, AdminUserService from app.services.auth_service import AuthService from app.services.coordinator.coordinator import ExecutionCoordinator @@ -60,10 +59,9 @@ from app.services.event_service import EventService from app.services.execution_service import ExecutionService from app.services.grafana_alert_processor import GrafanaAlertProcessor -from app.services.idempotency import IdempotencyConfig, IdempotencyManager +from app.services.idempotency import IdempotencyConfig, IdempotencyManager, IdempotencyMiddleware from app.services.idempotency.redis_repository import RedisIdempotencyRepository from app.services.k8s_worker import KubernetesWorker -from app.services.kafka_event_service import KafkaEventService from app.services.notification_scheduler import NotificationScheduler from app.services.notification_service import NotificationService from app.services.pod_monitor.config import PodMonitorConfig @@ -177,16 +175,14 @@ class MessagingProvider(Provider): broker = from_context(provides=KafkaBroker, scope=Scope.APP) @provide - def get_unified_producer( + def get_event_publisher( self, broker: KafkaBroker, - schema_registry: SchemaRegistryManager, event_repository: EventRepository, logger: logging.Logger, settings: Settings, - event_metrics: EventMetrics, - ) -> UnifiedProducer: - return UnifiedProducer(broker, schema_registry, event_repository, logger, settings, event_metrics) + ) -> EventPublisher: + return EventPublisher(broker, event_repository, logger, settings) @provide def get_idempotency_repository(self, redis_client: redis.Redis) -> RedisIdempotencyRepository: @@ -199,6 +195,16 @@ def get_idempotency_manager( return IdempotencyManager(IdempotencyConfig(), repo, logger, database_metrics) +class IdempotencyMiddlewareProvider(Provider): + """Provides APP-scoped IdempotencyMiddleware for broker registration.""" + + scope = Scope.APP + + @provide + def get_middleware(self, redis_client: redis.Redis, settings: Settings) -> IdempotencyMiddleware: + return IdempotencyMiddleware(redis_client, settings.KAFKA_TOPIC_PREFIX) + + class DLQProvider(Provider): """Provides DLQManager without scheduling. Used by all containers except the DLQ worker.""" @@ -209,7 +215,6 @@ def get_dlq_manager( self, broker: KafkaBroker, settings: Settings, - schema_registry: SchemaRegistryManager, logger: logging.Logger, dlq_metrics: DLQMetrics, repository: DLQRepository, @@ -217,7 +222,6 @@ def get_dlq_manager( return DLQManager( settings=settings, broker=broker, - schema_registry=schema_registry, logger=logger, dlq_metrics=dlq_metrics, repository=repository, @@ -238,7 +242,6 @@ async def get_dlq_manager( self, broker: KafkaBroker, settings: Settings, - schema_registry: SchemaRegistryManager, logger: logging.Logger, dlq_metrics: DLQMetrics, repository: DLQRepository, @@ -247,7 +250,6 @@ async def get_dlq_manager( manager = DLQManager( settings=settings, broker=broker, - schema_registry=schema_registry, logger=logger, dlq_metrics=dlq_metrics, repository=repository, @@ -272,14 +274,6 @@ async def get_dlq_manager( logger.info("DLQManager retry monitor stopped") -class EventProvider(Provider): - scope = Scope.APP - - @provide - def get_schema_registry(self, settings: Settings, logger: logging.Logger) -> SchemaRegistryManager: - return SchemaRegistryManager(settings, logger) - - class KubernetesProvider(Provider): scope = Scope.APP @@ -465,21 +459,6 @@ class KafkaServicesProvider(Provider): def get_event_service(self, event_repository: EventRepository) -> EventService: return EventService(event_repository) - @provide - def get_kafka_event_service( - self, - kafka_producer: UnifiedProducer, - settings: Settings, - logger: logging.Logger, - event_metrics: EventMetrics, - ) -> KafkaEventService: - return KafkaEventService( - kafka_producer=kafka_producer, - settings=settings, - logger=logger, - event_metrics=event_metrics, - ) - class UserServicesProvider(Provider): scope = Scope.APP @@ -488,11 +467,10 @@ class UserServicesProvider(Provider): def get_user_settings_service( self, repository: UserSettingsRepository, - kafka_event_service: KafkaEventService, settings: Settings, logger: logging.Logger, ) -> UserSettingsService: - return UserSettingsService(repository, kafka_event_service, settings, logger) + return UserSettingsService(repository, settings, logger) class AdminServicesProvider(Provider): @@ -519,7 +497,6 @@ def get_admin_settings_service( def get_notification_service( self, notification_repository: NotificationRepository, - kafka_event_service: KafkaEventService, sse_redis_bus: SSERedisBus, settings: Settings, logger: logging.Logger, @@ -527,7 +504,6 @@ def get_notification_service( ) -> NotificationService: return NotificationService( notification_repository=notification_repository, - event_service=kafka_event_service, sse_bus=sse_redis_bus, settings=settings, logger=logger, @@ -616,7 +592,7 @@ def get_saga_service( def get_execution_service( self, execution_repository: ExecutionRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, event_repository: EventRepository, settings: Settings, logger: logging.Logger, @@ -663,7 +639,7 @@ class CoordinatorProvider(Provider): @provide def get_execution_coordinator( self, - producer: UnifiedProducer, + producer: EventPublisher, execution_repository: ExecutionRepository, logger: logging.Logger, coordinator_metrics: CoordinatorMetrics, @@ -683,7 +659,7 @@ class K8sWorkerProvider(Provider): def get_kubernetes_worker( self, api_client: k8s_client.ApiClient, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, settings: Settings, logger: logging.Logger, event_metrics: EventMetrics, @@ -711,7 +687,7 @@ def get_event_mapper( @provide async def get_pod_monitor( self, - kafka_event_service: KafkaEventService, + producer: EventPublisher, api_client: k8s_client.ApiClient, logger: logging.Logger, event_mapper: PodEventMapper, @@ -722,7 +698,7 @@ async def get_pod_monitor( config = PodMonitorConfig() monitor = PodMonitor( config=config, - kafka_event_service=kafka_event_service, + producer=producer, logger=logger, api_client=api_client, event_mapper=event_mapper, @@ -771,7 +747,7 @@ class SagaOrchestratorProvider(Provider): def get_saga_orchestrator( self, saga_repository: SagaRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, resource_allocation_repository: ResourceAllocationRepository, logger: logging.Logger, ) -> SagaOrchestrator: @@ -797,7 +773,7 @@ class SagaWorkerProvider(Provider): async def get_saga_orchestrator( self, saga_repository: SagaRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, resource_allocation_repository: ResourceAllocationRepository, logger: logging.Logger, database: Database, @@ -837,7 +813,7 @@ class ResultProcessorProvider(Provider): def get_result_processor( self, execution_repo: ExecutionRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, settings: Settings, logger: logging.Logger, execution_metrics: ExecutionMetrics, @@ -858,7 +834,7 @@ class EventReplayProvider(Provider): def get_event_replay_service( self, replay_repository: ReplayRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, replay_metrics: ReplayMetrics, logger: logging.Logger, ) -> EventReplayService: @@ -883,7 +859,7 @@ class EventReplayWorkerProvider(Provider): async def get_event_replay_service( self, replay_repository: ReplayRepository, - kafka_producer: UnifiedProducer, + kafka_producer: EventPublisher, replay_metrics: ReplayMetrics, logger: logging.Logger, database: Database, diff --git a/backend/app/db/docs/dlq.py b/backend/app/db/docs/dlq.py index 71e2f7a3..83b4235f 100644 --- a/backend/app/db/docs/dlq.py +++ b/backend/app/db/docs/dlq.py @@ -5,15 +5,15 @@ from pymongo import ASCENDING, DESCENDING, IndexModel from app.dlq.models import DLQMessageStatus -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent class DLQMessageDocument(Document): - """Unified DLQ message document. Access event_id/event_type via event.event_id, event.event_type.""" + """Unified DLQ message document. Access event_id via event.event_id.""" model_config = ConfigDict(from_attributes=True) - event: DomainEvent # Discriminated union - contains event_id, event_type + event: BaseEvent original_topic: Indexed(str) = "" # type: ignore[valid-type] error: str = "Unknown error" retry_count: Indexed(int) = 0 # type: ignore[valid-type] @@ -36,7 +36,7 @@ class Settings: use_state_management = True indexes = [ IndexModel([("event.event_id", ASCENDING)], unique=True, name="idx_dlq_event_id"), - IndexModel([("event.event_type", ASCENDING)], name="idx_dlq_event_type"), + IndexModel([("original_topic", ASCENDING)], name="idx_dlq_original_topic"), IndexModel([("status", ASCENDING)], name="idx_dlq_status"), IndexModel([("failed_at", DESCENDING)], name="idx_dlq_failed_desc"), IndexModel([("created_at", ASCENDING)], name="idx_dlq_created_ttl", expireAfterSeconds=7 * 24 * 3600), diff --git a/backend/app/db/docs/event.py b/backend/app/db/docs/event.py index f5a65609..4e8b80e7 100644 --- a/backend/app/db/docs/event.py +++ b/backend/app/db/docs/event.py @@ -6,7 +6,6 @@ from pydantic import ConfigDict, Field from pymongo import ASCENDING, DESCENDING, IndexModel -from app.domain.enums.events import EventType from app.domain.events.typed import EventMetadata @@ -15,10 +14,11 @@ class EventDocument(Document): Uses extra='allow' for flexible event data storage - event-specific fields are stored directly at document level (no payload wrapper needed). + Topic field stores the event type name (e.g., 'execution_requested'). """ event_id: Indexed(str, unique=True) = Field(default_factory=lambda: str(uuid4())) # type: ignore[valid-type] - event_type: EventType # Indexed via Settings.indexes + topic: str # Event topic name (snake_case class name without 'Event' suffix) event_version: str = "1.0" timestamp: Indexed(datetime) = Field(default_factory=lambda: datetime.now(timezone.utc)) # type: ignore[valid-type] aggregate_id: Indexed(str) | None = None # type: ignore[valid-type] @@ -36,7 +36,7 @@ class Settings: use_state_management = True indexes = [ # Compound indexes for common query patterns - IndexModel([("event_type", ASCENDING), ("timestamp", DESCENDING)], name="idx_event_type_ts"), + IndexModel([("topic", ASCENDING), ("timestamp", DESCENDING)], name="idx_topic_ts"), IndexModel([("aggregate_id", ASCENDING), ("timestamp", DESCENDING)], name="idx_aggregate_ts"), IndexModel([("metadata.correlation_id", ASCENDING)], name="idx_meta_correlation"), IndexModel([("metadata.user_id", ASCENDING), ("timestamp", DESCENDING)], name="idx_meta_user_ts"), @@ -47,19 +47,19 @@ class Settings: # TTL index (expireAfterSeconds=0 means use ttl_expires_at value directly) IndexModel([("ttl_expires_at", ASCENDING)], name="idx_ttl", expireAfterSeconds=0), # Additional compound indexes for query optimization - IndexModel([("event_type", ASCENDING), ("aggregate_id", ASCENDING)], name="idx_events_type_agg"), + IndexModel([("topic", ASCENDING), ("aggregate_id", ASCENDING)], name="idx_events_topic_agg"), IndexModel([("aggregate_id", ASCENDING), ("timestamp", ASCENDING)], name="idx_events_agg_ts"), - IndexModel([("event_type", ASCENDING), ("timestamp", ASCENDING)], name="idx_events_type_ts_asc"), + IndexModel([("topic", ASCENDING), ("timestamp", ASCENDING)], name="idx_events_topic_ts_asc"), IndexModel([("metadata.user_id", ASCENDING), ("timestamp", ASCENDING)], name="idx_events_user_ts"), - IndexModel([("metadata.user_id", ASCENDING), ("event_type", ASCENDING)], name="idx_events_user_type"), + IndexModel([("metadata.user_id", ASCENDING), ("topic", ASCENDING)], name="idx_events_user_topic"), IndexModel( - [("event_type", ASCENDING), ("metadata.user_id", ASCENDING), ("timestamp", DESCENDING)], - name="idx_events_type_user_ts", + [("topic", ASCENDING), ("metadata.user_id", ASCENDING), ("timestamp", DESCENDING)], + name="idx_events_topic_user_ts", ), # Text search index IndexModel( [ - ("event_type", pymongo.TEXT), + ("topic", pymongo.TEXT), ("metadata.service_name", pymongo.TEXT), ("metadata.user_id", pymongo.TEXT), ("execution_id", pymongo.TEXT), @@ -79,7 +79,7 @@ class EventArchiveDocument(Document): """ event_id: Indexed(str, unique=True) # type: ignore[valid-type] - event_type: EventType # Indexed via Settings.indexes + topic: str # Event topic name event_version: str = "1.0" timestamp: Indexed(datetime) # type: ignore[valid-type] aggregate_id: str | None = None @@ -98,5 +98,5 @@ class Settings: name = "events_archive" use_state_management = True indexes = [ - IndexModel([("event_type", 1)]), + IndexModel([("topic", 1)]), ] diff --git a/backend/app/db/repositories/admin/admin_events_repository.py b/backend/app/db/repositories/admin/admin_events_repository.py index 380ebcea..520249cf 100644 --- a/backend/app/db/repositories/admin/admin_events_repository.py +++ b/backend/app/db/repositories/admin/admin_events_repository.py @@ -13,19 +13,17 @@ ) from app.domain.admin import ExecutionResultSummary, ReplaySessionData, ReplaySessionStatusDetail from app.domain.admin.replay_updates import ReplaySessionUpdate -from app.domain.enums.events import EventType from app.domain.enums.replay import ReplayStatus from app.domain.events import ( - DomainEvent, - DomainEventAdapter, + BaseEvent, EventBrowseResult, EventDetail, EventExportRow, EventFilter, EventStatistics, EventSummary, - EventTypeCount, HourlyEventCount, + TopicCount, UserEventCount, ) from app.domain.replay.models import ReplayFilter, ReplaySessionState @@ -35,7 +33,7 @@ class AdminEventsRepository: def _event_filter_conditions(self, f: EventFilter) -> list[Any]: """Build Beanie query conditions from EventFilter for EventDocument.""" conditions = [ - In(EventDocument.event_type, f.event_types) if f.event_types else None, + In(EventDocument.topic, f.topics) if f.topics else None, EventDocument.aggregate_id == f.aggregate_id if f.aggregate_id else None, EventDocument.metadata.correlation_id == f.correlation_id if f.correlation_id else None, EventDocument.metadata.user_id == f.user_id if f.user_id else None, @@ -59,7 +57,7 @@ async def browse_events( total = await query.count() docs = await query.sort([(sort_by, sort_order)]).skip(skip).limit(limit).to_list() - events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + events = [BaseEvent.model_validate(d, from_attributes=True) for d in docs] return EventBrowseResult(events=events, total=total, skip=skip, limit=limit) @@ -68,7 +66,7 @@ async def get_event_detail(self, event_id: str) -> EventDetail | None: if not doc: return None - event = DomainEventAdapter.validate_python(doc, from_attributes=True) + event = BaseEvent.model_validate(doc, from_attributes=True) related_query = {"metadata.correlation_id": doc.metadata.correlation_id, "event_id": {"$ne": event_id}} related_docs = await ( @@ -77,7 +75,7 @@ async def get_event_detail(self, event_id: str) -> EventDetail | None: related_events = [ EventSummary( event_id=d.event_id, - event_type=d.event_type, + topic=d.topic, timestamp=d.timestamp, aggregate_id=d.aggregate_id, ) @@ -143,7 +141,7 @@ async def get_event_stats(self, hours: int = 24) -> EventStatistics: .limit(10) ) top_types = await EventDocument.aggregate(type_pipeline.export()).to_list() - events_by_type = [EventTypeCount(event_type=EventType(t["_id"]), count=t["count"]) for t in top_types] + events_by_topic = [TopicCount(topic=t["_id"], count=t["count"]) for t in top_types] # Hourly events pipeline - project renames _id->hour hourly_pipeline = ( @@ -189,7 +187,7 @@ async def get_event_stats(self, hours: int = 24) -> EventStatistics: return EventStatistics( total_events=stats["total_events"], - events_by_type=events_by_type, + events_by_topic=events_by_topic, events_by_hour=events_by_hour, top_users=top_users, error_rate=round(error_rate, 2), @@ -205,7 +203,7 @@ async def export_events_csv(self, event_filter: EventFilter) -> list[EventExport return [ EventExportRow( event_id=doc.event_id, - event_type=doc.event_type, + topic=doc.topic, timestamp=doc.timestamp, correlation_id=doc.metadata.correlation_id or "", aggregate_id=doc.aggregate_id or "", @@ -217,7 +215,7 @@ async def export_events_csv(self, event_filter: EventFilter) -> list[EventExport for doc in docs ] - async def archive_event(self, event: DomainEvent, deleted_by: str) -> bool: + async def archive_event(self, event: BaseEvent, deleted_by: str) -> bool: archive_doc = EventArchiveDocument( **event.model_dump(), deleted_at=datetime.now(timezone.utc), @@ -326,7 +324,7 @@ async def get_events_preview_for_replay(self, replay_filter: ReplayFilter, limit return [ EventSummary( event_id=doc.event_id, - event_type=doc.event_type, + topic=doc.topic, timestamp=doc.timestamp, aggregate_id=doc.aggregate_id, ) diff --git a/backend/app/db/repositories/dlq_repository.py b/backend/app/db/repositories/dlq_repository.py index 83aa62ff..477ba123 100644 --- a/backend/app/db/repositories/dlq_repository.py +++ b/backend/app/db/repositories/dlq_repository.py @@ -18,7 +18,6 @@ EventTypeStatistic, TopicStatistic, ) -from app.domain.enums.events import EventType class DLQRepository: @@ -45,16 +44,8 @@ async def get_dlq_stats(self) -> DLQStatistics: topic_results = await DLQMessageDocument.aggregate(topic_pipeline.export()).to_list() by_topic = [TopicStatistic.model_validate(doc) for doc in topic_results] - # Counts by event type (top 10) - project renames _id->event_type - event_type_pipeline = ( - Pipeline() - .group(by=S.field(DLQMessageDocument.event.event_type), query={"count": S.sum(1)}) - .sort(by="count", descending=True) - .limit(10) - .project(_id=0, event_type="$_id", count=1) - ) - event_type_results = await DLQMessageDocument.aggregate(event_type_pipeline.export()).to_list() - by_event_type = [EventTypeStatistic.model_validate(doc) for doc in event_type_results if doc["event_type"]] + # Note: event_type field removed from events - use original_topic for statistics instead + by_event_type: list[EventTypeStatistic] = [] # Age statistics - use $toLong to convert Date to milliseconds for $avg time_pipeline = Pipeline().group( @@ -91,14 +82,12 @@ async def get_messages( self, status: DLQMessageStatus | None = None, topic: str | None = None, - event_type: EventType | None = None, limit: int = 50, offset: int = 0, ) -> DLQMessageListResult: conditions: list[Any] = [ DLQMessageDocument.status == status if status else None, DLQMessageDocument.original_topic == topic if topic else None, - DLQMessageDocument.event.event_type == event_type if event_type else None, ] conditions = [c for c in conditions if c is not None] diff --git a/backend/app/db/repositories/event_repository.py b/backend/app/db/repositories/event_repository.py index 17598020..ad95c8fe 100644 --- a/backend/app/db/repositories/event_repository.py +++ b/backend/app/db/repositories/event_repository.py @@ -11,17 +11,15 @@ from app.core.tracing import EventAttributes from app.core.tracing.utils import add_span_attributes from app.db.docs import EventArchiveDocument, EventDocument -from app.domain.enums.events import EventType from app.domain.events import ( ArchivedEvent, - DomainEvent, - DomainEventAdapter, + BaseEvent, EventAggregationResult, EventListResult, EventReplayInfo, EventStatistics, - EventTypeCount, ServiceEventCount, + TopicCount, ) @@ -41,14 +39,16 @@ def _build_time_filter(self, start_time: datetime | None, end_time: datetime | N """Build time filter dict for aggregation pipelines.""" return {key: value for key, value in {"$gte": start_time, "$lte": end_time}.items() if value is not None} - async def store_event(self, event: DomainEvent) -> str: + async def store_event(self, event: BaseEvent) -> str: """Idempotent event store — silently ignores duplicates by event_id.""" + topic = type(event).topic() data = event.model_dump(exclude_none=True) + data["topic"] = topic data.setdefault("stored_at", datetime.now(timezone.utc)) doc = EventDocument(**data) add_span_attributes( **{ - str(EventAttributes.EVENT_TYPE): str(event.event_type), + str(EventAttributes.EVENT_TYPE): topic, str(EventAttributes.EVENT_ID): event.event_id, str(EventAttributes.EXECUTION_ID): event.aggregate_id or "", } @@ -58,16 +58,17 @@ async def store_event(self, event: DomainEvent) -> str: except DuplicateKeyError: self.logger.debug(f"Event {event.event_id} already stored, skipping") return event.event_id - self.logger.debug(f"Stored event {event.event_id} of type {event.event_type}") + self.logger.debug(f"Stored event {event.event_id} of topic {topic}") return event.event_id - async def store_events_batch(self, events: list[DomainEvent]) -> list[str]: + async def store_events_batch(self, events: list[BaseEvent]) -> list[str]: if not events: return [] now = datetime.now(timezone.utc) docs = [] for event in events: data = event.model_dump(exclude_none=True) + data["topic"] = type(event).topic() data.setdefault("stored_at", now) docs.append(EventDocument(**data)) await EventDocument.insert_many(docs) @@ -75,22 +76,22 @@ async def store_events_batch(self, events: list[DomainEvent]) -> list[str]: self.logger.info(f"Stored {len(events)} events in batch") return [event.event_id for event in events] - async def get_event(self, event_id: str) -> DomainEvent | None: + async def get_event(self, event_id: str) -> BaseEvent | None: doc = await EventDocument.find_one(EventDocument.event_id == event_id) if not doc: return None - return DomainEventAdapter.validate_python(doc, from_attributes=True) + return BaseEvent.model_validate(doc, from_attributes=True) - async def get_events_by_type( + async def get_events_by_topic( self, - event_type: EventType, + topic: str, start_time: datetime | None = None, end_time: datetime | None = None, limit: int = 100, skip: int = 0, - ) -> list[DomainEvent]: + ) -> list[BaseEvent]: conditions = [ - EventDocument.event_type == event_type, + EventDocument.topic == topic, *self._time_conditions(start_time, end_time), ] docs = ( @@ -100,18 +101,18 @@ async def get_events_by_type( .limit(limit) .to_list() ) - return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + return [BaseEvent.model_validate(d, from_attributes=True) for d in docs] async def get_events_by_aggregate( - self, aggregate_id: str, event_types: list[EventType] | None = None, limit: int = 100 - ) -> list[DomainEvent]: + self, aggregate_id: str, topics: list[str] | None = None, limit: int = 100 + ) -> list[BaseEvent]: conditions: list[BaseFindOperator] = [Eq(EventDocument.aggregate_id, aggregate_id)] - if event_types: - conditions.append(In(EventDocument.event_type, list(event_types))) + if topics: + conditions.append(In(EventDocument.topic, list(topics))) docs = ( await EventDocument.find(*conditions).sort([("timestamp", SortDirection.ASCENDING)]).limit(limit).to_list() ) - return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + return [BaseEvent.model_validate(d, from_attributes=True) for d in docs] async def get_events_by_correlation( self, correlation_id: str, limit: int = 100, skip: int = 0, user_id: str | None = None, @@ -125,7 +126,7 @@ async def get_events_by_correlation( .sort([("timestamp", SortDirection.ASCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + events = [BaseEvent.model_validate(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(condition).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -139,15 +140,15 @@ async def get_events_by_correlation( async def get_events_by_user( self, user_id: str, - event_types: list[EventType] | None = None, + topics: list[str] | None = None, start_time: datetime | None = None, end_time: datetime | None = None, limit: int = 100, skip: int = 0, - ) -> list[DomainEvent]: + ) -> list[BaseEvent]: conditions = [ EventDocument.metadata.user_id == user_id, - In(EventDocument.event_type, event_types) if event_types else None, + In(EventDocument.topic, topics) if topics else None, *self._time_conditions(start_time, end_time), ] conditions = [c for c in conditions if c is not None] @@ -158,7 +159,7 @@ async def get_events_by_user( .limit(limit) .to_list() ) - return [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + return [BaseEvent.model_validate(d, from_attributes=True) for d in docs] async def get_execution_events( self, @@ -166,14 +167,14 @@ async def get_execution_events( limit: int = 100, skip: int = 0, exclude_system_events: bool = False, - event_types: list[EventType] | None = None, + topics: list[str] | None = None, ) -> EventListResult: conditions: list[Any] = [ Or( EventDocument.execution_id == execution_id, EventDocument.aggregate_id == execution_id, ), - In(EventDocument.event_type, event_types) if event_types else None, + In(EventDocument.topic, topics) if topics else None, Not(RegEx(EventDocument.metadata.service_name, "^system-")) if exclude_system_events else None, ] conditions = [c for c in conditions if c is not None] @@ -184,7 +185,7 @@ async def get_execution_events( .sort([("timestamp", SortDirection.ASCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + events = [BaseEvent.model_validate(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(*conditions).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -212,8 +213,8 @@ async def get_event_statistics( [ { "$facet": { - "by_type": [ - {"$group": {"_id": S.field(EventDocument.event_type), "count": {"$sum": 1}}}, + "by_topic": [ + {"$group": {"_id": S.field(EventDocument.topic), "count": {"$sum": 1}}}, {"$sort": {"count": -1}}, ], "by_service": [ @@ -241,9 +242,9 @@ async def get_event_statistics( "$project": { "_id": 0, "total_events": {"$ifNull": [{"$arrayElemAt": ["$total.count", 0]}, 0]}, - "events_by_type": { + "events_by_topic": { "$arrayToObject": { - "$map": {"input": "$by_type", "as": "t", "in": {"k": "$$t._id", "v": "$$t.count"}} + "$map": {"input": "$by_topic", "as": "t", "in": {"k": "$$t._id", "v": "$$t.count"}} } }, "events_by_service": { @@ -264,9 +265,9 @@ async def get_event_statistics( ) async for doc in EventDocument.aggregate(pipeline): - doc["events_by_type"] = [ - EventTypeCount(event_type=EventType(k), count=v) - for k, v in doc.get("events_by_type", {}).items() + doc["events_by_topic"] = [ + TopicCount(topic=k, count=v) + for k, v in doc.get("events_by_topic", {}).items() ] doc["events_by_service"] = [ ServiceEventCount(service_name=k, count=v) @@ -274,15 +275,15 @@ async def get_event_statistics( ] return EventStatistics(**doc) - return EventStatistics(total_events=0, events_by_type=[], events_by_service=[], events_by_hour=[]) + return EventStatistics(total_events=0, events_by_topic=[], events_by_service=[], events_by_hour=[]) async def cleanup_old_events( - self, older_than_days: int = 30, event_types: list[EventType] | None = None, dry_run: bool = False + self, older_than_days: int = 30, topics: list[str] | None = None, dry_run: bool = False ) -> int: cutoff_dt = datetime.now(timezone.utc) - timedelta(days=older_than_days) conditions: list[Any] = [ LT(EventDocument.timestamp, cutoff_dt), - In(EventDocument.event_type, event_types) if event_types else None, + In(EventDocument.topic, topics) if topics else None, ] conditions = [c for c in conditions if c is not None] @@ -299,7 +300,7 @@ async def cleanup_old_events( async def get_user_events_paginated( self, user_id: str, - event_types: list[EventType] | None = None, + topics: list[str] | None = None, start_time: datetime | None = None, end_time: datetime | None = None, limit: int = 100, @@ -308,7 +309,7 @@ async def get_user_events_paginated( ) -> EventListResult: conditions = [ EventDocument.metadata.user_id == user_id, - In(EventDocument.event_type, event_types) if event_types else None, + In(EventDocument.topic, topics) if topics else None, *self._time_conditions(start_time, end_time), ] conditions = [c for c in conditions if c is not None] @@ -319,7 +320,7 @@ async def get_user_events_paginated( .sort([("timestamp", sort_direction)]) .skip(skip).limit(limit).to_list() ) - events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + events = [BaseEvent.model_validate(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(*conditions).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -346,7 +347,7 @@ async def query_events( .sort([(sort_field, SortDirection.DESCENDING)]) .skip(skip).limit(limit).to_list() ) - events = [DomainEventAdapter.validate_python(d, from_attributes=True) for d in docs] + events = [BaseEvent.model_validate(d, from_attributes=True) for d in docs] total_count = await EventDocument.find(query).count() total_count = max(total_count, skip + len(events)) return EventListResult( @@ -359,14 +360,14 @@ async def aggregate_events(self, pipeline: list[dict[str, Any]], limit: int = 10 results = await EventDocument.aggregate(pipeline_with_limit).to_list() return EventAggregationResult(results=results, pipeline=pipeline_with_limit) - async def list_event_types(self, match: dict[str, object] | None = None) -> list[str]: - """List distinct event types, optionally filtered.""" + async def list_topics(self, match: dict[str, object] | None = None) -> list[str]: + """List distinct event topics, optionally filtered.""" pipeline: list[dict[str, object]] = [] if match: pipeline.append({"$match": match}) pipeline.extend( [ - {"$group": {"_id": S.field(EventDocument.event_type)}}, + {"$group": {"_id": S.field(EventDocument.topic)}}, {"$sort": {"_id": 1}}, ] ) @@ -387,7 +388,7 @@ async def delete_event_with_archival( await doc.delete() return ArchivedEvent.model_validate(doc, from_attributes=True).model_copy(update=archive_fields) - async def get_aggregate_events_for_replay(self, aggregate_id: str, limit: int = 10000) -> list[DomainEvent]: + async def get_aggregate_events_for_replay(self, aggregate_id: str, limit: int = 10000) -> list[BaseEvent]: return await self.get_events_by_aggregate(aggregate_id=aggregate_id, limit=limit) async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo | None: @@ -401,7 +402,7 @@ async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo query={ "events": {"$push": "$$ROOT"}, "event_count": S.sum(1), - "event_types": {"$addToSet": S.field(EventDocument.event_type)}, + "topics": {"$addToSet": S.field(EventDocument.topic)}, "start_time": S.min(S.field(EventDocument.timestamp)), "end_time": S.max(S.field(EventDocument.timestamp)), }, @@ -410,11 +411,11 @@ async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo ) async for doc in EventDocument.aggregate(pipeline.export()): - events = [DomainEventAdapter.validate_python(e) for e in doc["events"]] + events = [BaseEvent.model_validate(e) for e in doc["events"]] return EventReplayInfo( events=events, event_count=doc["event_count"], - event_types=doc["event_types"], + topics=doc["topics"], start_time=doc["start_time"], end_time=doc["end_time"], ) diff --git a/backend/app/db/repositories/user_settings_repository.py b/backend/app/db/repositories/user_settings_repository.py index 69718e25..4572b020 100644 --- a/backend/app/db/repositories/user_settings_repository.py +++ b/backend/app/db/repositories/user_settings_repository.py @@ -6,7 +6,6 @@ from beanie.operators import GT, LTE, Eq, In from app.db.docs import EventDocument, UserSettingsDocument, UserSettingsSnapshotDocument -from app.domain.enums.events import EventType from app.domain.user.settings_models import DomainUserSettings, DomainUserSettingsChangedEvent @@ -31,7 +30,7 @@ async def create_snapshot(self, settings: DomainUserSettings) -> None: async def get_settings_events( self, user_id: str, - event_types: list[EventType], + topics: list[str], since: datetime | None = None, until: datetime | None = None, limit: int | None = None, @@ -40,7 +39,7 @@ async def get_settings_events( aggregate_id = f"user_settings_{user_id}" conditions: list[BaseFindOperator] = [ Eq(EventDocument.aggregate_id, aggregate_id), - In(EventDocument.event_type, [str(et) for et in event_types]), + In(EventDocument.topic, topics), ] if since: conditions.append(GT(EventDocument.timestamp, since)) diff --git a/backend/app/dlq/manager.py b/backend/app/dlq/manager.py index 5cf21e36..36e10a1b 100644 --- a/backend/app/dlq/manager.py +++ b/backend/app/dlq/manager.py @@ -23,7 +23,6 @@ DLQMessageRetriedEvent, EventMetadata, ) -from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings @@ -39,7 +38,6 @@ def __init__( self, settings: Settings, broker: KafkaBroker, - schema_registry: SchemaRegistryManager, logger: logging.Logger, dlq_metrics: DLQMetrics, repository: DLQRepository, @@ -51,7 +49,6 @@ def __init__( ): self.settings = settings self._broker = broker - self.schema_registry = schema_registry self.logger = logger self.metrics = dlq_metrics self.repository = repository @@ -151,31 +148,30 @@ async def handle_message(self, message: DLQMessage) -> None: async def retry_message(self, message: DLQMessage) -> None: """Retry a DLQ message by republishing to the retry topic and original topic.""" retry_topic = f"{message.original_topic}{self.retry_topic_suffix}" + event_topic = type(message.event).topic() hdrs: dict[str, str] = { - "event_type": message.event.event_type, + "topic": event_topic, "dlq_retry_count": str(message.retry_count + 1), "dlq_original_error": message.error, "dlq_retry_timestamp": datetime.now(timezone.utc).isoformat(), } hdrs = inject_trace_context(hdrs) - serialized = await self.schema_registry.serialize_event(message.event) - await self._broker.publish( - message=serialized, + message=message.event, topic=retry_topic, key=message.event.event_id.encode(), headers=hdrs, ) await self._broker.publish( - message=serialized, + message=message.event, topic=message.original_topic, key=message.event.event_id.encode(), headers=hdrs, ) - self.metrics.record_dlq_message_retried(message.original_topic, message.event.event_type, "success") + self.metrics.record_dlq_message_retried(message.original_topic, event_topic, "success") new_retry_count = message.retry_count + 1 await self.repository.update_status( @@ -192,7 +188,8 @@ async def retry_message(self, message: DLQMessage) -> None: async def discard_message(self, message: DLQMessage, reason: str) -> None: """Discard a DLQ message, updating status and emitting an event.""" - self.metrics.record_dlq_message_discarded(message.original_topic, message.event.event_type, reason) + event_topic = type(message.event).topic() + self.metrics.record_dlq_message_discarded(message.original_topic, event_topic, reason) await self.repository.update_status( message.event.event_id, @@ -296,7 +293,6 @@ async def _emit_message_received_event(self, message: DLQMessage) -> None: event = DLQMessageReceivedEvent( dlq_event_id=message.event.event_id, original_topic=message.original_topic, - original_event_type=str(message.event.event_type), error=message.error, retry_count=message.retry_count, producer_id=message.producer_id, @@ -309,7 +305,6 @@ async def _emit_message_retried_event(self, message: DLQMessage, retry_topic: st event = DLQMessageRetriedEvent( dlq_event_id=message.event.event_id, original_topic=message.original_topic, - original_event_type=str(message.event.event_type), retry_count=new_retry_count, retry_topic=retry_topic, metadata=self._event_metadata, @@ -320,7 +315,6 @@ async def _emit_message_discarded_event(self, message: DLQMessage, reason: str) event = DLQMessageDiscardedEvent( dlq_event_id=message.event.event_id, original_topic=message.original_topic, - original_event_type=str(message.event.event_type), reason=reason, retry_count=message.retry_count, metadata=self._event_metadata, @@ -331,11 +325,10 @@ async def _produce_dlq_event( self, event: DLQMessageReceivedEvent | DLQMessageRetriedEvent | DLQMessageDiscardedEvent ) -> None: try: - serialized = await self.schema_registry.serialize_event(event) await self._broker.publish( - message=serialized, + message=event, topic=self._dlq_events_topic, key=event.event_id.encode(), ) except Exception as e: - self.logger.error(f"Failed to emit DLQ event {event.event_type}: {e}") + self.logger.error(f"Failed to emit DLQ event {type(event).topic()}: {e}") diff --git a/backend/app/dlq/models.py b/backend/app/dlq/models.py index 66961243..9cc45e19 100644 --- a/backend/app/dlq/models.py +++ b/backend/app/dlq/models.py @@ -5,8 +5,7 @@ from pydantic import BaseModel, ConfigDict, Field from app.core.utils import StringEnum -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent class DLQMessageStatus(StringEnum): @@ -29,11 +28,11 @@ class RetryStrategy(StringEnum): class DLQMessage(BaseModel): - """Unified DLQ message model. Access event_id/event_type via event.event_id, event.event_type.""" + """Unified DLQ message model. Access event_id via event.event_id.""" model_config = ConfigDict(from_attributes=True) - event: DomainEvent # Discriminated union - auto-validates from dict + event: BaseEvent original_topic: str = "" error: str = "Unknown error" retry_count: int = 0 @@ -72,7 +71,6 @@ class DLQMessageFilter: status: DLQMessageStatus | None = None topic: str | None = None - event_type: EventType | None = None @dataclass diff --git a/backend/app/domain/admin/overview_models.py b/backend/app/domain/admin/overview_models.py index 1304dfe6..77f3280f 100644 --- a/backend/app/domain/admin/overview_models.py +++ b/backend/app/domain/admin/overview_models.py @@ -4,7 +4,7 @@ from pydantic.dataclasses import dataclass -from app.domain.events import DomainEvent, EventStatistics +from app.domain.events import BaseEvent, EventStatistics from app.domain.user import User as DomainAdminUser @@ -30,4 +30,4 @@ class AdminUserOverviewDomain: stats: EventStatistics derived_counts: DerivedCountsDomain rate_limit_summary: RateLimitSummaryDomain - recent_events: list[DomainEvent] = field(default_factory=list) + recent_events: list[BaseEvent] = field(default_factory=list) diff --git a/backend/app/domain/enums/kafka.py b/backend/app/domain/enums/kafka.py index e1eceeb7..cdfeab77 100644 --- a/backend/app/domain/enums/kafka.py +++ b/backend/app/domain/enums/kafka.py @@ -1,54 +1,15 @@ from app.core.utils import StringEnum -from app.domain.enums.events import EventType class KafkaTopic(StringEnum): - """Kafka topic names used throughout the system.""" + """Kafka topic names for infrastructure topics only. - EXECUTION_EVENTS = "execution_events" - EXECUTION_COMPLETED = "execution_completed" - EXECUTION_FAILED = "execution_failed" - EXECUTION_TIMEOUT = "execution_timeout" - EXECUTION_REQUESTS = "execution_requests" - EXECUTION_COMMANDS = "execution_commands" - EXECUTION_TASKS = "execution_tasks" + Note: Domain event topics are derived from event class names via BaseEvent.topic(). + This enum is only for infrastructure topics (DLQ) that don't follow the event-class pattern. + """ - # Pod topics - POD_EVENTS = "pod_events" - POD_STATUS_UPDATES = "pod_status_updates" - POD_RESULTS = "pod_results" - - # Result topics - EXECUTION_RESULTS = "execution_results" - - # User topics - USER_EVENTS = "user_events" - USER_NOTIFICATIONS = "user_notifications" - USER_SETTINGS_EVENTS = "user_settings_events" - - # Script topics - SCRIPT_EVENTS = "script_events" - - # Security topics - SECURITY_EVENTS = "security_events" - - # Resource topics - RESOURCE_EVENTS = "resource_events" - - # Notification topics - NOTIFICATION_EVENTS = "notification_events" - - # System topics - SYSTEM_EVENTS = "system_events" - - # Saga topics - SAGA_EVENTS = "saga_events" - SAGA_COMMANDS = "saga_commands" - - # Infrastructure topics DEAD_LETTER_QUEUE = "dead_letter_queue" DLQ_EVENTS = "dlq_events" - WEBSOCKET_EVENTS = "websocket_events" class GroupId(StringEnum): @@ -59,92 +20,5 @@ class GroupId(StringEnum): POD_MONITOR = "pod-monitor" RESULT_PROCESSOR = "result-processor" SAGA_ORCHESTRATOR = "saga-orchestrator" - EVENT_STORE_CONSUMER = "event-store-consumer" - WEBSOCKET_GATEWAY = "websocket-gateway" NOTIFICATION_SERVICE = "notification-service" - DLQ_PROCESSOR = "dlq-processor" DLQ_MANAGER = "dlq-manager" - - -# Consumer group topic subscriptions -CONSUMER_GROUP_SUBSCRIPTIONS: dict[GroupId, set[KafkaTopic]] = { - GroupId.EXECUTION_COORDINATOR: { - KafkaTopic.EXECUTION_EVENTS, - KafkaTopic.EXECUTION_RESULTS, - }, - GroupId.K8S_WORKER: { - KafkaTopic.SAGA_COMMANDS, # Receives CreatePodCommand/DeletePodCommand from coordinator - }, - GroupId.POD_MONITOR: { - KafkaTopic.POD_EVENTS, - KafkaTopic.POD_STATUS_UPDATES, - }, - GroupId.RESULT_PROCESSOR: { - KafkaTopic.EXECUTION_EVENTS, # Listens for COMPLETED/FAILED/TIMEOUT, publishes to EXECUTION_RESULTS - }, - GroupId.SAGA_ORCHESTRATOR: { - # Orchestrator is triggered by domain events, specifically EXECUTION_REQUESTED, - # and emits commands on SAGA_COMMANDS. - KafkaTopic.EXECUTION_EVENTS, - KafkaTopic.SAGA_COMMANDS, - }, - GroupId.WEBSOCKET_GATEWAY: { - KafkaTopic.EXECUTION_EVENTS, - KafkaTopic.EXECUTION_RESULTS, - KafkaTopic.POD_EVENTS, - KafkaTopic.POD_STATUS_UPDATES, - }, - GroupId.NOTIFICATION_SERVICE: { - KafkaTopic.NOTIFICATION_EVENTS, - KafkaTopic.EXECUTION_EVENTS, - }, - GroupId.DLQ_PROCESSOR: { - KafkaTopic.DEAD_LETTER_QUEUE, - }, -} - -# Consumer group event filters -CONSUMER_GROUP_EVENTS: dict[GroupId, set[EventType]] = { - GroupId.EXECUTION_COORDINATOR: { - EventType.EXECUTION_REQUESTED, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_CANCELLED, - }, - GroupId.K8S_WORKER: { - EventType.EXECUTION_STARTED, - }, - GroupId.POD_MONITOR: { - EventType.POD_CREATED, - EventType.POD_RUNNING, - EventType.POD_SUCCEEDED, - EventType.POD_FAILED, - }, - GroupId.RESULT_PROCESSOR: { - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_TIMEOUT, - }, - GroupId.SAGA_ORCHESTRATOR: { - EventType.EXECUTION_REQUESTED, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_TIMEOUT, - }, - GroupId.WEBSOCKET_GATEWAY: { - EventType.EXECUTION_REQUESTED, - EventType.EXECUTION_STARTED, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.POD_CREATED, - EventType.POD_RUNNING, - EventType.RESULT_STORED, - }, - GroupId.NOTIFICATION_SERVICE: { - EventType.NOTIFICATION_CREATED, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_TIMEOUT, - }, - GroupId.DLQ_PROCESSOR: set(), -} diff --git a/backend/app/domain/events/__init__.py b/backend/app/domain/events/__init__.py index 2a9bc41c..02849e3f 100644 --- a/backend/app/domain/events/__init__.py +++ b/backend/app/domain/events/__init__.py @@ -11,10 +11,10 @@ EventSortOrder, EventStatistics, EventSummary, - EventTypeCount, ExecutionEventsResult, HourlyEventCount, ServiceEventCount, + TopicCount, UserEventCount, ) from app.domain.events.typed import ( @@ -28,8 +28,10 @@ ContainerStatusInfo, CreatePodCommandEvent, DeletePodCommandEvent, - DomainEvent, - DomainEventAdapter, + # DLQ Events + DLQMessageDiscardedEvent, + DLQMessageReceivedEvent, + DLQMessageRetriedEvent, EventMetadata, # Execution Events ExecutionAcceptedEvent, @@ -107,19 +109,17 @@ "EventSortOrder", "EventStatistics", "EventSummary", - "EventTypeCount", "ExecutionEventsResult", "HourlyEventCount", "ServiceEventCount", + "TopicCount", "UserEventCount", # Base types "ArchivedEvent", "BaseEvent", "ContainerStatusInfo", - "DomainEvent", "EventMetadata", "ResourceUsageDomain", - "DomainEventAdapter", # Execution Events "ExecutionRequestedEvent", "ExecutionAcceptedEvent", @@ -185,4 +185,8 @@ "SystemErrorEvent", "ServiceUnhealthyEvent", "ServiceRecoveredEvent", + # DLQ Events + "DLQMessageReceivedEvent", + "DLQMessageRetriedEvent", + "DLQMessageDiscardedEvent", ] diff --git a/backend/app/domain/events/event_models.py b/backend/app/domain/events/event_models.py index 6e152d41..cdd738ef 100644 --- a/backend/app/domain/events/event_models.py +++ b/backend/app/domain/events/event_models.py @@ -6,8 +6,7 @@ from pydantic.dataclasses import dataclass from app.core.utils import StringEnum -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent MongoQueryValue = str | dict[str, str | list[str] | float | datetime] MongoQuery = dict[str, MongoQueryValue] @@ -46,7 +45,7 @@ class EventSummary: """Lightweight event summary for lists and previews.""" event_id: str - event_type: EventType + topic: str timestamp: datetime aggregate_id: str | None = None @@ -56,7 +55,7 @@ class EventFilter(BaseModel): model_config = ConfigDict(from_attributes=True) - event_types: list[EventType] | None = None + topics: list[str] | None = None aggregate_id: str | None = None correlation_id: str | None = None user_id: str | None = None @@ -85,7 +84,7 @@ def get_sort_direction(self) -> int: class EventListResult: """Result of event list query.""" - events: list[DomainEvent] + events: list[BaseEvent] total: int skip: int limit: int @@ -96,7 +95,7 @@ class EventListResult: class EventBrowseResult: """Result for event browsing.""" - events: list[DomainEvent] + events: list[BaseEvent] total: int skip: int limit: int @@ -106,14 +105,14 @@ class EventBrowseResult: class EventDetail: """Detailed event information with related events.""" - event: DomainEvent + event: BaseEvent related_events: list[EventSummary] = field(default_factory=list) timeline: list[EventSummary] = field(default_factory=list) @dataclass -class EventTypeCount: - event_type: EventType +class TopicCount: + topic: str count: int @@ -140,7 +139,7 @@ class EventStatistics: """Event statistics.""" total_events: int - events_by_type: list[EventTypeCount] = field(default_factory=list) + events_by_topic: list[TopicCount] = field(default_factory=list) events_by_service: list[ServiceEventCount] = field(default_factory=list) events_by_hour: list[HourlyEventCount | dict[str, Any]] = field(default_factory=list) top_users: list[UserEventCount] = field(default_factory=list) @@ -167,9 +166,9 @@ class EventProjection: class EventReplayInfo: """Information for event replay.""" - events: list[DomainEvent] + events: list[BaseEvent] event_count: int - event_types: list[EventType] + topics: list[str] start_time: datetime end_time: datetime @@ -178,11 +177,11 @@ class EventReplayInfo: class ExecutionEventsResult: """Result of execution events query.""" - events: list[DomainEvent] + events: list[BaseEvent] access_allowed: bool include_system_events: bool - def get_filtered_events(self) -> list[DomainEvent]: + def get_filtered_events(self) -> list[BaseEvent]: """Get events filtered based on access and system event settings.""" if not self.access_allowed: return [] @@ -200,7 +199,7 @@ class EventExportRow(BaseModel): model_config = ConfigDict(from_attributes=True) event_id: str - event_type: EventType + topic: str timestamp: datetime correlation_id: str aggregate_id: str diff --git a/backend/app/domain/events/typed.py b/backend/app/domain/events/typed.py index c230b9d4..42704ee8 100644 --- a/backend/app/domain/events/typed.py +++ b/backend/app/domain/events/typed.py @@ -1,19 +1,28 @@ +import re from datetime import datetime, timezone -from typing import Annotated, Literal from uuid import uuid4 -from pydantic import ConfigDict, Discriminator, Field, TypeAdapter -from pydantic_avro.to_avro.base import AvroBase +from pydantic import BaseModel, ConfigDict, Field from app.domain.enums.auth import LoginMethod from app.domain.enums.common import Environment -from app.domain.enums.events import EventType from app.domain.enums.execution import QueuePriority from app.domain.enums.notification import NotificationChannel, NotificationSeverity from app.domain.enums.storage import ExecutionErrorType, StorageType -class ResourceUsageDomain(AvroBase): +def _to_snake_case(name: str) -> str: + """Convert class name to snake_case topic name. + + ExecutionRequestedEvent -> execution_requested + PodCreatedEvent -> pod_created + """ + if name.endswith("Event"): + name = name[:-5] + return re.sub(r"(? execution_requested + """ - # Pydantic marks fields with default/default_factory as optional in JSON Schema, - # which generates optional TypeScript types (e.g., `event_id?: string`). - # Since stored events always have these fields, we override the schema to mark them required. - # See: https://github.com/pydantic/pydantic/issues/7209 - # See: https://github.com/pydantic/pydantic/discussions/6073 model_config = ConfigDict( from_attributes=True, - json_schema_extra={"required": ["event_id", "event_type", "event_version", "timestamp", "metadata"]}, + json_schema_extra={"required": ["event_id", "event_version", "timestamp", "metadata"]}, ) event_id: str = Field(default_factory=lambda: str(uuid4())) - event_type: EventType event_version: str = "1.0" timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) aggregate_id: str | None = None metadata: EventMetadata + @classmethod + def topic(cls, prefix: str = "") -> str: + """Get Kafka topic name for this event class.""" + return f"{prefix}{_to_snake_case(cls.__name__)}" + # --- Execution Events --- class ExecutionRequestedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_REQUESTED] = EventType.EXECUTION_REQUESTED execution_id: str script: str language: str @@ -80,7 +91,6 @@ class ExecutionRequestedEvent(BaseEvent): class ExecutionAcceptedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_ACCEPTED] = EventType.EXECUTION_ACCEPTED execution_id: str queue_position: int estimated_wait_seconds: float | None = None @@ -88,14 +98,12 @@ class ExecutionAcceptedEvent(BaseEvent): class ExecutionQueuedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_QUEUED] = EventType.EXECUTION_QUEUED execution_id: str position_in_queue: int | None = None estimated_start_time: datetime | None = None class ExecutionStartedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_STARTED] = EventType.EXECUTION_STARTED execution_id: str pod_name: str node_name: str | None = None @@ -103,14 +111,12 @@ class ExecutionStartedEvent(BaseEvent): class ExecutionRunningEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_RUNNING] = EventType.EXECUTION_RUNNING execution_id: str pod_name: str progress_percentage: int | None = None class ExecutionCompletedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_COMPLETED] = EventType.EXECUTION_COMPLETED execution_id: str exit_code: int resource_usage: ResourceUsageDomain | None = None @@ -119,7 +125,6 @@ class ExecutionCompletedEvent(BaseEvent): class ExecutionFailedEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_FAILED] = EventType.EXECUTION_FAILED execution_id: str exit_code: int error_type: ExecutionErrorType @@ -130,7 +135,6 @@ class ExecutionFailedEvent(BaseEvent): class ExecutionTimeoutEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_TIMEOUT] = EventType.EXECUTION_TIMEOUT execution_id: str timeout_seconds: int resource_usage: ResourceUsageDomain | None = None @@ -139,7 +143,6 @@ class ExecutionTimeoutEvent(BaseEvent): class ExecutionCancelledEvent(BaseEvent): - event_type: Literal[EventType.EXECUTION_CANCELLED] = EventType.EXECUTION_CANCELLED execution_id: str reason: str cancelled_by: str | None = None @@ -150,20 +153,18 @@ class ExecutionCancelledEvent(BaseEvent): class PodCreatedEvent(BaseEvent): - event_type: Literal[EventType.POD_CREATED] = EventType.POD_CREATED execution_id: str pod_name: str namespace: str = "default" class PodScheduledEvent(BaseEvent): - event_type: Literal[EventType.POD_SCHEDULED] = EventType.POD_SCHEDULED execution_id: str pod_name: str node_name: str = "" -class ContainerStatusInfo(AvroBase): +class ContainerStatusInfo(BaseModel): """Container status information from Kubernetes pod.""" model_config = ConfigDict(from_attributes=True) @@ -175,14 +176,12 @@ class ContainerStatusInfo(AvroBase): class PodRunningEvent(BaseEvent): - event_type: Literal[EventType.POD_RUNNING] = EventType.POD_RUNNING execution_id: str pod_name: str container_statuses: list[ContainerStatusInfo] = Field(default_factory=list) class PodSucceededEvent(BaseEvent): - event_type: Literal[EventType.POD_SUCCEEDED] = EventType.POD_SUCCEEDED execution_id: str pod_name: str exit_code: int = 0 @@ -191,7 +190,6 @@ class PodSucceededEvent(BaseEvent): class PodFailedEvent(BaseEvent): - event_type: Literal[EventType.POD_FAILED] = EventType.POD_FAILED execution_id: str pod_name: str exit_code: int = 1 @@ -202,7 +200,6 @@ class PodFailedEvent(BaseEvent): class PodTerminatedEvent(BaseEvent): - event_type: Literal[EventType.POD_TERMINATED] = EventType.POD_TERMINATED execution_id: str pod_name: str exit_code: int = 0 @@ -211,7 +208,6 @@ class PodTerminatedEvent(BaseEvent): class PodDeletedEvent(BaseEvent): - event_type: Literal[EventType.POD_DELETED] = EventType.POD_DELETED execution_id: str pod_name: str reason: str | None = None @@ -221,7 +217,6 @@ class PodDeletedEvent(BaseEvent): class ResultStoredEvent(BaseEvent): - event_type: Literal[EventType.RESULT_STORED] = EventType.RESULT_STORED execution_id: str storage_type: StorageType | None = None storage_path: str = "" @@ -229,7 +224,6 @@ class ResultStoredEvent(BaseEvent): class ResultFailedEvent(BaseEvent): - event_type: Literal[EventType.RESULT_FAILED] = EventType.RESULT_FAILED execution_id: str error: str = "" storage_type: StorageType | None = None @@ -241,21 +235,18 @@ class ResultFailedEvent(BaseEvent): class UserSettingsUpdatedEvent(BaseEvent): model_config = ConfigDict(extra="allow") - event_type: Literal[EventType.USER_SETTINGS_UPDATED] = EventType.USER_SETTINGS_UPDATED user_id: str changed_fields: list[str] = Field(default_factory=list) reason: str | None = None class UserRegisteredEvent(BaseEvent): - event_type: Literal[EventType.USER_REGISTERED] = EventType.USER_REGISTERED user_id: str username: str email: str class UserLoginEvent(BaseEvent): - event_type: Literal[EventType.USER_LOGIN] = EventType.USER_LOGIN user_id: str login_method: LoginMethod ip_address: str | None = None @@ -263,7 +254,6 @@ class UserLoginEvent(BaseEvent): class UserLoggedInEvent(BaseEvent): - event_type: Literal[EventType.USER_LOGGED_IN] = EventType.USER_LOGGED_IN user_id: str login_method: LoginMethod ip_address: str | None = None @@ -271,20 +261,17 @@ class UserLoggedInEvent(BaseEvent): class UserLoggedOutEvent(BaseEvent): - event_type: Literal[EventType.USER_LOGGED_OUT] = EventType.USER_LOGGED_OUT user_id: str logout_reason: str | None = None class UserUpdatedEvent(BaseEvent): - event_type: Literal[EventType.USER_UPDATED] = EventType.USER_UPDATED user_id: str updated_fields: list[str] = Field(default_factory=list) updated_by: str | None = None class UserDeletedEvent(BaseEvent): - event_type: Literal[EventType.USER_DELETED] = EventType.USER_DELETED user_id: str deleted_by: str | None = None reason: str | None = None @@ -294,7 +281,6 @@ class UserDeletedEvent(BaseEvent): class NotificationCreatedEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_CREATED] = EventType.NOTIFICATION_CREATED notification_id: str user_id: str subject: str @@ -305,7 +291,6 @@ class NotificationCreatedEvent(BaseEvent): class NotificationSentEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_SENT] = EventType.NOTIFICATION_SENT notification_id: str user_id: str channel: NotificationChannel @@ -313,7 +298,6 @@ class NotificationSentEvent(BaseEvent): class NotificationDeliveredEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_DELIVERED] = EventType.NOTIFICATION_DELIVERED notification_id: str user_id: str channel: NotificationChannel @@ -321,7 +305,6 @@ class NotificationDeliveredEvent(BaseEvent): class NotificationFailedEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_FAILED] = EventType.NOTIFICATION_FAILED notification_id: str user_id: str channel: NotificationChannel @@ -330,21 +313,18 @@ class NotificationFailedEvent(BaseEvent): class NotificationReadEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_READ] = EventType.NOTIFICATION_READ notification_id: str user_id: str read_at: datetime class NotificationAllReadEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_ALL_READ] = EventType.NOTIFICATION_ALL_READ user_id: str count: int read_at: datetime class NotificationClickedEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_CLICKED] = EventType.NOTIFICATION_CLICKED notification_id: str user_id: str clicked_at: datetime @@ -352,7 +332,6 @@ class NotificationClickedEvent(BaseEvent): class NotificationPreferencesUpdatedEvent(BaseEvent): - event_type: Literal[EventType.NOTIFICATION_PREFERENCES_UPDATED] = EventType.NOTIFICATION_PREFERENCES_UPDATED user_id: str changed_fields: list[str] = Field(default_factory=list) @@ -361,7 +340,6 @@ class NotificationPreferencesUpdatedEvent(BaseEvent): class SagaStartedEvent(BaseEvent): - event_type: Literal[EventType.SAGA_STARTED] = EventType.SAGA_STARTED saga_id: str saga_name: str execution_id: str @@ -369,7 +347,6 @@ class SagaStartedEvent(BaseEvent): class SagaCompletedEvent(BaseEvent): - event_type: Literal[EventType.SAGA_COMPLETED] = EventType.SAGA_COMPLETED saga_id: str saga_name: str execution_id: str @@ -377,7 +354,6 @@ class SagaCompletedEvent(BaseEvent): class SagaFailedEvent(BaseEvent): - event_type: Literal[EventType.SAGA_FAILED] = EventType.SAGA_FAILED saga_id: str saga_name: str execution_id: str @@ -386,7 +362,6 @@ class SagaFailedEvent(BaseEvent): class SagaCancelledEvent(BaseEvent): - event_type: Literal[EventType.SAGA_CANCELLED] = EventType.SAGA_CANCELLED saga_id: str saga_name: str execution_id: str @@ -398,7 +373,6 @@ class SagaCancelledEvent(BaseEvent): class SagaCompensatingEvent(BaseEvent): - event_type: Literal[EventType.SAGA_COMPENSATING] = EventType.SAGA_COMPENSATING saga_id: str saga_name: str execution_id: str @@ -406,7 +380,6 @@ class SagaCompensatingEvent(BaseEvent): class SagaCompensatedEvent(BaseEvent): - event_type: Literal[EventType.SAGA_COMPENSATED] = EventType.SAGA_COMPENSATED saga_id: str saga_name: str execution_id: str @@ -417,7 +390,6 @@ class SagaCompensatedEvent(BaseEvent): class CreatePodCommandEvent(BaseEvent): - event_type: Literal[EventType.CREATE_POD_COMMAND] = EventType.CREATE_POD_COMMAND saga_id: str execution_id: str script: str @@ -435,7 +407,6 @@ class CreatePodCommandEvent(BaseEvent): class DeletePodCommandEvent(BaseEvent): - event_type: Literal[EventType.DELETE_POD_COMMAND] = EventType.DELETE_POD_COMMAND saga_id: str execution_id: str reason: str @@ -444,14 +415,12 @@ class DeletePodCommandEvent(BaseEvent): class AllocateResourcesCommandEvent(BaseEvent): - event_type: Literal[EventType.ALLOCATE_RESOURCES_COMMAND] = EventType.ALLOCATE_RESOURCES_COMMAND execution_id: str cpu_request: str memory_request: str class ReleaseResourcesCommandEvent(BaseEvent): - event_type: Literal[EventType.RELEASE_RESOURCES_COMMAND] = EventType.RELEASE_RESOURCES_COMMAND execution_id: str cpu_request: str memory_request: str @@ -461,7 +430,6 @@ class ReleaseResourcesCommandEvent(BaseEvent): class ScriptSavedEvent(BaseEvent): - event_type: Literal[EventType.SCRIPT_SAVED] = EventType.SCRIPT_SAVED script_id: str user_id: str title: str @@ -469,14 +437,12 @@ class ScriptSavedEvent(BaseEvent): class ScriptDeletedEvent(BaseEvent): - event_type: Literal[EventType.SCRIPT_DELETED] = EventType.SCRIPT_DELETED script_id: str user_id: str deleted_by: str | None = None class ScriptSharedEvent(BaseEvent): - event_type: Literal[EventType.SCRIPT_SHARED] = EventType.SCRIPT_SHARED script_id: str shared_by: str shared_with: list[str] = Field(default_factory=list) @@ -487,7 +453,6 @@ class ScriptSharedEvent(BaseEvent): class SecurityViolationEvent(BaseEvent): - event_type: Literal[EventType.SECURITY_VIOLATION] = EventType.SECURITY_VIOLATION user_id: str | None = None violation_type: str details: str @@ -495,7 +460,6 @@ class SecurityViolationEvent(BaseEvent): class RateLimitExceededEvent(BaseEvent): - event_type: Literal[EventType.RATE_LIMIT_EXCEEDED] = EventType.RATE_LIMIT_EXCEEDED user_id: str | None = None endpoint: str limit: int @@ -503,7 +467,6 @@ class RateLimitExceededEvent(BaseEvent): class AuthFailedEvent(BaseEvent): - event_type: Literal[EventType.AUTH_FAILED] = EventType.AUTH_FAILED username: str | None = None reason: str ip_address: str | None = None @@ -513,7 +476,6 @@ class AuthFailedEvent(BaseEvent): class ResourceLimitExceededEvent(BaseEvent): - event_type: Literal[EventType.RESOURCE_LIMIT_EXCEEDED] = EventType.RESOURCE_LIMIT_EXCEEDED resource_type: str limit: int requested: int @@ -521,7 +483,6 @@ class ResourceLimitExceededEvent(BaseEvent): class QuotaExceededEvent(BaseEvent): - event_type: Literal[EventType.QUOTA_EXCEEDED] = EventType.QUOTA_EXCEEDED quota_type: str limit: int current_usage: int @@ -532,7 +493,6 @@ class QuotaExceededEvent(BaseEvent): class SystemErrorEvent(BaseEvent): - event_type: Literal[EventType.SYSTEM_ERROR] = EventType.SYSTEM_ERROR error_type: str message: str service_name: str @@ -540,14 +500,12 @@ class SystemErrorEvent(BaseEvent): class ServiceUnhealthyEvent(BaseEvent): - event_type: Literal[EventType.SERVICE_UNHEALTHY] = EventType.SERVICE_UNHEALTHY service_name: str health_check: str reason: str class ServiceRecoveredEvent(BaseEvent): - event_type: Literal[EventType.SERVICE_RECOVERED] = EventType.SERVICE_RECOVERED service_name: str health_check: str downtime_seconds: int @@ -559,10 +517,8 @@ class ServiceRecoveredEvent(BaseEvent): class DLQMessageReceivedEvent(BaseEvent): """Emitted when a message is received and persisted in the DLQ.""" - event_type: Literal[EventType.DLQ_MESSAGE_RECEIVED] = EventType.DLQ_MESSAGE_RECEIVED - dlq_event_id: str # The event_id of the failed message + dlq_event_id: str original_topic: str - original_event_type: str error: str retry_count: int producer_id: str @@ -572,135 +528,37 @@ class DLQMessageReceivedEvent(BaseEvent): class DLQMessageRetriedEvent(BaseEvent): """Emitted when a DLQ message is retried.""" - event_type: Literal[EventType.DLQ_MESSAGE_RETRIED] = EventType.DLQ_MESSAGE_RETRIED - dlq_event_id: str # The event_id of the retried message + dlq_event_id: str original_topic: str - original_event_type: str - retry_count: int # New retry count after this retry - retry_topic: str # Topic the message was retried to + retry_count: int + retry_topic: str class DLQMessageDiscardedEvent(BaseEvent): """Emitted when a DLQ message is discarded (max retries exceeded or manual discard).""" - event_type: Literal[EventType.DLQ_MESSAGE_DISCARDED] = EventType.DLQ_MESSAGE_DISCARDED - dlq_event_id: str # The event_id of the discarded message + dlq_event_id: str original_topic: str - original_event_type: str reason: str - retry_count: int # Final retry count when discarded + retry_count: int # --- Archived Event (for deleted events) --- -class ArchivedEvent(AvroBase): +class ArchivedEvent(BaseModel): """Archived event with deletion metadata. Wraps the original event data.""" model_config = ConfigDict(from_attributes=True) event_id: str - event_type: EventType + topic: str event_version: str = "1.0" timestamp: datetime aggregate_id: str | None = None metadata: EventMetadata stored_at: datetime | None = None ttl_expires_at: datetime | None = None - # Archive-specific fields deleted_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) deleted_by: str | None = None deletion_reason: str | None = None - - -# --- Discriminated Union: TYPE SYSTEM handles dispatch --- - -DomainEvent = Annotated[ - # Execution Events - ExecutionRequestedEvent - | ExecutionAcceptedEvent - | ExecutionQueuedEvent - | ExecutionStartedEvent - | ExecutionRunningEvent - | ExecutionCompletedEvent - | ExecutionFailedEvent - | ExecutionTimeoutEvent - | ExecutionCancelledEvent - # Pod Events - | PodCreatedEvent - | PodScheduledEvent - | PodRunningEvent - | PodSucceededEvent - | PodFailedEvent - | PodTerminatedEvent - | PodDeletedEvent - # Result Events - | ResultStoredEvent - | ResultFailedEvent - # User Events - | UserSettingsUpdatedEvent - | UserRegisteredEvent - | UserLoginEvent - | UserLoggedInEvent - | UserLoggedOutEvent - | UserUpdatedEvent - | UserDeletedEvent - # Notification Events - | NotificationCreatedEvent - | NotificationSentEvent - | NotificationDeliveredEvent - | NotificationFailedEvent - | NotificationReadEvent - | NotificationAllReadEvent - | NotificationClickedEvent - | NotificationPreferencesUpdatedEvent - # Saga Events - | SagaStartedEvent - | SagaCompletedEvent - | SagaFailedEvent - | SagaCancelledEvent - | SagaCompensatingEvent - | SagaCompensatedEvent - # Saga Command Events - | CreatePodCommandEvent - | DeletePodCommandEvent - | AllocateResourcesCommandEvent - | ReleaseResourcesCommandEvent - # Script Events - | ScriptSavedEvent - | ScriptDeletedEvent - | ScriptSharedEvent - # Security Events - | SecurityViolationEvent - | RateLimitExceededEvent - | AuthFailedEvent - # Resource Events - | ResourceLimitExceededEvent - | QuotaExceededEvent - # System Events - | SystemErrorEvent - | ServiceUnhealthyEvent - | ServiceRecoveredEvent - # DLQ Events - | DLQMessageReceivedEvent - | DLQMessageRetriedEvent - | DLQMessageDiscardedEvent, - Discriminator("event_type"), -] - -# Focused union for execution-related events only (for API response typing) -ExecutionDomainEvent = Annotated[ - ExecutionRequestedEvent - | ExecutionAcceptedEvent - | ExecutionQueuedEvent - | ExecutionStartedEvent - | ExecutionRunningEvent - | ExecutionCompletedEvent - | ExecutionFailedEvent - | ExecutionTimeoutEvent - | ExecutionCancelledEvent, - Discriminator("event_type"), -] - -# TypeAdapter for polymorphic loading - validates raw data to correct typed event -DomainEventAdapter: TypeAdapter[DomainEvent] = TypeAdapter(DomainEvent) diff --git a/backend/app/domain/user/settings_models.py b/backend/app/domain/user/settings_models.py index 30af0354..03f66e40 100644 --- a/backend/app/domain/user/settings_models.py +++ b/backend/app/domain/user/settings_models.py @@ -6,7 +6,6 @@ from pydantic import BaseModel, ConfigDict, Field from app.domain.enums.common import Theme -from app.domain.enums.events import EventType from app.domain.enums.notification import NotificationChannel @@ -75,7 +74,7 @@ class DomainUserSettingsChangedEvent(BaseModel): model_config = ConfigDict(from_attributes=True, extra="ignore") event_id: str - event_type: EventType + topic: str timestamp: datetime user_id: str changed_fields: list[str] @@ -94,7 +93,7 @@ class DomainSettingsHistoryEntry(BaseModel): model_config = ConfigDict(from_attributes=True) timestamp: datetime - event_type: EventType + topic: str field: str old_value: Any new_value: Any diff --git a/backend/app/events/broker.py b/backend/app/events/broker.py deleted file mode 100644 index 01e70da4..00000000 --- a/backend/app/events/broker.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging -from typing import Any - -from faststream import StreamMessage -from faststream.kafka import KafkaBroker - -from app.domain.events.typed import DomainEvent, DomainEventAdapter -from app.events.schema.schema_registry import SchemaRegistryManager -from app.settings import Settings - - -def create_broker( - settings: Settings, - schema_registry: SchemaRegistryManager, - logger: logging.Logger, -) -> KafkaBroker: - """Create a KafkaBroker with Avro decoder for standalone workers.""" - - async def avro_decoder(msg: StreamMessage[Any]) -> DomainEvent: - payload = await schema_registry.serializer.decode_message(msg.body) - return DomainEventAdapter.validate_python(payload) - - return KafkaBroker( - settings.KAFKA_BOOTSTRAP_SERVERS, - decoder=avro_decoder, - logger=logger, - ) diff --git a/backend/app/events/core/__init__.py b/backend/app/events/core/__init__.py index 555a3e77..fa26c218 100644 --- a/backend/app/events/core/__init__.py +++ b/backend/app/events/core/__init__.py @@ -1,5 +1,3 @@ -from .producer import UnifiedProducer +from .producer import EventPublisher -__all__ = [ - "UnifiedProducer", -] +__all__ = ["EventPublisher"] diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py index daad87e2..cbd8daf0 100644 --- a/backend/app/events/core/producer.py +++ b/backend/app/events/core/producer.py @@ -5,86 +5,74 @@ from faststream.kafka import KafkaBroker -from app.core.metrics import EventMetrics -from app.core.tracing.utils import inject_trace_context from app.db.repositories.event_repository import EventRepository from app.dlq.models import DLQMessageStatus -from app.domain.enums.kafka import KafkaTopic -from app.domain.events.typed import DomainEvent -from app.events.schema.schema_registry import SchemaRegistryManager -from app.infrastructure.kafka.mappings import EVENT_TYPE_TO_TOPIC +from app.domain.events.typed import BaseEvent from app.settings import Settings -class UnifiedProducer: - """Fully async Kafka producer backed by FastStream KafkaBroker. +class EventPublisher: + """Minimal event publisher: persist to MongoDB, then publish to Kafka. - The broker's lifecycle (start/stop) is managed externally — either by - the FastStream app (worker entry points) or by the FastAPI lifespan. + Metrics and tracing are handled by FastStream middleware (KafkaPrometheusMiddleware, + KafkaTelemetryMiddleware) - no manual instrumentation needed here. + + Topic routing: 1 event type = 1 topic. + Topic name derived from class: ExecutionRequestedEvent -> execution_requested """ def __init__( self, broker: KafkaBroker, - schema_registry_manager: SchemaRegistryManager, event_repository: EventRepository, logger: logging.Logger, settings: Settings, - event_metrics: EventMetrics, ): self._broker = broker - self._schema_registry = schema_registry_manager - self._event_repository = event_repository - self.logger = logger - self._event_metrics = event_metrics - self._topic_prefix = settings.KAFKA_TOPIC_PREFIX - - async def produce(self, event_to_produce: DomainEvent, key: str) -> None: - """Persist event to MongoDB, then publish to Kafka.""" - await self._event_repository.store_event(event_to_produce) - topic = f"{self._topic_prefix}{EVENT_TYPE_TO_TOPIC[event_to_produce.event_type]}" - try: - serialized_value = await self._schema_registry.serialize_event(event_to_produce) - - headers = inject_trace_context({ - "event_type": event_to_produce.event_type, - "correlation_id": event_to_produce.metadata.correlation_id or "", - "service": event_to_produce.metadata.service_name, - }) - - await self._broker.publish( - message=serialized_value, - topic=topic, - key=key.encode(), - headers=headers, - ) - - self._event_metrics.record_kafka_message_produced(topic) - self.logger.debug(f"Message [{event_to_produce}] sent to topic: {topic}") - - except Exception as e: - self._event_metrics.record_kafka_production_error(topic=topic, error_type=type(e).__name__) - self.logger.error(f"Failed to produce message: {e}") - raise + self._repo = event_repository + self._logger = logger + self._prefix = settings.KAFKA_TOPIC_PREFIX - async def send_to_dlq( - self, original_event: DomainEvent, original_topic: str, error: Exception, retry_count: int = 0 - ) -> None: - """Send a failed event to the Dead Letter Queue. + async def publish(self, event: BaseEvent, key: str | None = None) -> str: + """Persist event to MongoDB, then publish to Kafka. - The event body is Avro-encoded (same as every other topic). - DLQ metadata is carried in Kafka headers. + Args: + event: The domain event to publish + key: Optional Kafka partition key (defaults to aggregate_id or event_id) + + Returns: + The event_id of the published event """ - try: - current_task = asyncio.current_task() - task_name = current_task.get_name() if current_task else "main" - producer_id = f"{socket.gethostname()}-{task_name}" + await self._repo.store_event(event) + + topic = type(event).topic(self._prefix) + effective_key = key or event.aggregate_id or event.event_id - serialized_value = await self._schema_registry.serialize_event(original_event) - dlq_topic = f"{self._topic_prefix}{KafkaTopic.DEAD_LETTER_QUEUE}" + await self._broker.publish( + message=event, + topic=topic, + key=effective_key.encode() if effective_key else None, + ) - headers = inject_trace_context({ - "event_type": original_event.event_type, + return event.event_id + + async def send_to_dlq( + self, + event: BaseEvent, + original_topic: str, + error: Exception, + retry_count: int = 0, + ) -> None: + """Send a failed event to the Dead Letter Queue.""" + current_task = asyncio.current_task() + task_name = current_task.get_name() if current_task else "main" + producer_id = f"{socket.gethostname()}-{task_name}" + + await self._broker.publish( + message=event, + topic=f"{self._prefix}dead_letter_queue", + key=event.event_id.encode() if event.event_id else None, + headers={ "original_topic": original_topic, "error_type": type(error).__name__, "error": str(error), @@ -92,23 +80,10 @@ async def send_to_dlq( "failed_at": datetime.now(timezone.utc).isoformat(), "status": DLQMessageStatus.PENDING, "producer_id": producer_id, - }) - - await self._broker.publish( - message=serialized_value, - topic=dlq_topic, - key=original_event.event_id.encode() if original_event.event_id else None, - headers=headers, - ) - - self._event_metrics.record_kafka_message_produced(dlq_topic) - self.logger.warning( - f"Event {original_event.event_id} sent to DLQ. " - f"Original topic: {original_topic}, Error: {error}, " - f"Retry count: {retry_count}" - ) - - except Exception as e: - self.logger.critical( - f"Failed to send event {original_event.event_id} to DLQ: {e}. Original error: {error}", exc_info=True - ) + }, + ) + + self._logger.warning( + f"Event {event.event_id} sent to DLQ. " + f"Original topic: {original_topic}, Error: {error}" + ) diff --git a/backend/app/events/handlers.py b/backend/app/events/handlers.py index d067cebf..9d601f27 100644 --- a/backend/app/events/handlers.py +++ b/backend/app/events/handlers.py @@ -1,33 +1,38 @@ +"""Kafka event handlers. + +Architecture: 1 topic = 1 event type. +- Topic name derived from class: ExecutionRequestedEvent -> execution_requested +- No filters needed - topic IS the router +- Type hint = deserialization contract +""" + import asyncio import logging -from collections.abc import Awaitable, Callable from datetime import datetime, timezone from typing import Any from dishka.integrations.faststream import FromDishka -from faststream import AckPolicy, StreamMessage +from faststream import AckPolicy, Context from faststream.kafka import KafkaBroker +from faststream.message import StreamMessage from opentelemetry.trace import SpanKind from app.core.tracing import EventAttributes from app.core.tracing.utils import extract_trace_context, get_tracer from app.dlq.manager import DLQManager from app.dlq.models import DLQMessage, DLQMessageStatus -from app.domain.enums.events import EventType -from app.domain.enums.kafka import CONSUMER_GROUP_SUBSCRIPTIONS, GroupId, KafkaTopic +from app.domain.enums.kafka import GroupId from app.domain.events.typed import ( + BaseEvent, CreatePodCommandEvent, DeletePodCommandEvent, - DomainEvent, ExecutionCancelledEvent, ExecutionCompletedEvent, ExecutionFailedEvent, ExecutionRequestedEvent, ExecutionTimeoutEvent, ) -from app.domain.idempotency import KeyStrategy from app.services.coordinator.coordinator import ExecutionCoordinator -from app.services.idempotency import IdempotencyManager from app.services.k8s_worker import KubernetesWorker from app.services.notification_service import NotificationService from app.services.result_processor.processor import ResultProcessor @@ -36,276 +41,277 @@ from app.settings import Settings -async def with_idempotency( - event: DomainEvent, - handler: Callable[..., Awaitable[None]], - idem: IdempotencyManager, - key_strategy: KeyStrategy, - ttl_seconds: int, - logger: logging.Logger, -) -> None: - """Run *handler* inside an idempotency guard (check → execute → mark).""" - result = await idem.check_and_reserve( - event=event, key_strategy=key_strategy, ttl_seconds=ttl_seconds, - ) - if result.is_duplicate: - logger.info(f"Duplicate event: {event.event_type} ({event.event_id})") - return - try: - await handler(event) - await idem.mark_completed(event=event, key_strategy=key_strategy) - except Exception as e: - await idem.mark_failed( - event=event, error=str(e), key_strategy=key_strategy, - ) - raise - - -def _topics(settings: Settings, group_id: GroupId) -> list[str]: - return [ - f"{settings.KAFKA_TOPIC_PREFIX}{t}" - for t in CONSUMER_GROUP_SUBSCRIPTIONS[group_id] - ] - - def register_coordinator_subscriber(broker: KafkaBroker, settings: Settings) -> None: - sub = broker.subscriber( - *_topics(settings, GroupId.EXECUTION_COORDINATOR), + prefix = settings.KAFKA_TOPIC_PREFIX + + @broker.subscriber( + ExecutionRequestedEvent.topic(prefix), group_id=GroupId.EXECUTION_COORDINATOR, ack_policy=AckPolicy.ACK, ) - - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_REQUESTED) async def on_execution_requested( - body: ExecutionRequestedEvent, - coordinator: FromDishka[ExecutionCoordinator], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionRequestedEvent, + coordinator: FromDishka[ExecutionCoordinator], ) -> None: - await with_idempotency( - body, coordinator.handle_execution_requested, idem, KeyStrategy.EVENT_BASED, 7200, logger, - ) + await coordinator.handle_execution_requested(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_COMPLETED) + @broker.subscriber( + ExecutionCompletedEvent.topic(prefix), + group_id=GroupId.EXECUTION_COORDINATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_completed( - body: ExecutionCompletedEvent, - coordinator: FromDishka[ExecutionCoordinator], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionCompletedEvent, + coordinator: FromDishka[ExecutionCoordinator], ) -> None: - await with_idempotency( - body, coordinator.handle_execution_completed, idem, KeyStrategy.EVENT_BASED, 7200, logger, - ) + await coordinator.handle_execution_completed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_FAILED) + @broker.subscriber( + ExecutionFailedEvent.topic(prefix), + group_id=GroupId.EXECUTION_COORDINATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_failed( - body: ExecutionFailedEvent, - coordinator: FromDishka[ExecutionCoordinator], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionFailedEvent, + coordinator: FromDishka[ExecutionCoordinator], ) -> None: - await with_idempotency( - body, coordinator.handle_execution_failed, idem, KeyStrategy.EVENT_BASED, 7200, logger, - ) + await coordinator.handle_execution_failed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_CANCELLED) + @broker.subscriber( + ExecutionCancelledEvent.topic(prefix), + group_id=GroupId.EXECUTION_COORDINATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_cancelled( - body: ExecutionCancelledEvent, - coordinator: FromDishka[ExecutionCoordinator], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionCancelledEvent, + coordinator: FromDishka[ExecutionCoordinator], ) -> None: - await with_idempotency( - body, coordinator.handle_execution_cancelled, idem, KeyStrategy.EVENT_BASED, 7200, logger, - ) - - @sub - async def on_unhandled(body: DomainEvent) -> None: - pass + await coordinator.handle_execution_cancelled(body) def register_k8s_worker_subscriber(broker: KafkaBroker, settings: Settings) -> None: - sub = broker.subscriber( - *_topics(settings, GroupId.K8S_WORKER), + prefix = settings.KAFKA_TOPIC_PREFIX + + @broker.subscriber( + CreatePodCommandEvent.topic(prefix), group_id=GroupId.K8S_WORKER, ack_policy=AckPolicy.ACK, ) - - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.CREATE_POD_COMMAND) async def on_create_pod( - body: CreatePodCommandEvent, - worker: FromDishka[KubernetesWorker], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: CreatePodCommandEvent, + worker: FromDishka[KubernetesWorker], ) -> None: - await with_idempotency(body, worker.handle_create_pod_command, idem, KeyStrategy.CONTENT_HASH, 3600, logger) + await worker.handle_create_pod_command(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.DELETE_POD_COMMAND) + @broker.subscriber( + DeletePodCommandEvent.topic(prefix), + group_id=GroupId.K8S_WORKER, + ack_policy=AckPolicy.ACK, + ) async def on_delete_pod( - body: DeletePodCommandEvent, - worker: FromDishka[KubernetesWorker], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: DeletePodCommandEvent, + worker: FromDishka[KubernetesWorker], ) -> None: - await with_idempotency(body, worker.handle_delete_pod_command, idem, KeyStrategy.CONTENT_HASH, 3600, logger) - - @sub - async def on_unhandled(body: DomainEvent) -> None: - pass + await worker.handle_delete_pod_command(body) def register_result_processor_subscriber(broker: KafkaBroker, settings: Settings) -> None: - sub = broker.subscriber( - *_topics(settings, GroupId.RESULT_PROCESSOR), + prefix = settings.KAFKA_TOPIC_PREFIX + + @broker.subscriber( + ExecutionCompletedEvent.topic(prefix), group_id=GroupId.RESULT_PROCESSOR, ack_policy=AckPolicy.ACK, max_poll_records=1, auto_offset_reset="earliest", ) - - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_COMPLETED) async def on_execution_completed( - body: ExecutionCompletedEvent, - processor: FromDishka[ResultProcessor], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionCompletedEvent, + processor: FromDishka[ResultProcessor], ) -> None: - await with_idempotency(body, processor.handle_execution_completed, idem, KeyStrategy.CONTENT_HASH, 7200, logger) + await processor.handle_execution_completed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_FAILED) + @broker.subscriber( + ExecutionFailedEvent.topic(prefix), + group_id=GroupId.RESULT_PROCESSOR, + ack_policy=AckPolicy.ACK, + max_poll_records=1, + auto_offset_reset="earliest", + ) async def on_execution_failed( - body: ExecutionFailedEvent, - processor: FromDishka[ResultProcessor], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionFailedEvent, + processor: FromDishka[ResultProcessor], ) -> None: - await with_idempotency(body, processor.handle_execution_failed, idem, KeyStrategy.CONTENT_HASH, 7200, logger) + await processor.handle_execution_failed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_TIMEOUT) + @broker.subscriber( + ExecutionTimeoutEvent.topic(prefix), + group_id=GroupId.RESULT_PROCESSOR, + ack_policy=AckPolicy.ACK, + max_poll_records=1, + auto_offset_reset="earliest", + ) async def on_execution_timeout( - body: ExecutionTimeoutEvent, - processor: FromDishka[ResultProcessor], - idem: FromDishka[IdempotencyManager], - logger: FromDishka[logging.Logger], + body: ExecutionTimeoutEvent, + processor: FromDishka[ResultProcessor], ) -> None: - await with_idempotency(body, processor.handle_execution_timeout, idem, KeyStrategy.CONTENT_HASH, 7200, logger) - - @sub - async def on_unhandled(body: DomainEvent) -> None: - pass + await processor.handle_execution_timeout(body) def register_saga_subscriber(broker: KafkaBroker, settings: Settings) -> None: - sub = broker.subscriber( - *_topics(settings, GroupId.SAGA_ORCHESTRATOR), + prefix = settings.KAFKA_TOPIC_PREFIX + + @broker.subscriber( + ExecutionRequestedEvent.topic(prefix), group_id=GroupId.SAGA_ORCHESTRATOR, ack_policy=AckPolicy.ACK, ) - - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_REQUESTED) async def on_execution_requested( - body: ExecutionRequestedEvent, - orchestrator: FromDishka[SagaOrchestrator], + body: ExecutionRequestedEvent, + orchestrator: FromDishka[SagaOrchestrator], ) -> None: await orchestrator.handle_execution_requested(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_COMPLETED) + @broker.subscriber( + ExecutionCompletedEvent.topic(prefix), + group_id=GroupId.SAGA_ORCHESTRATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_completed( - body: ExecutionCompletedEvent, - orchestrator: FromDishka[SagaOrchestrator], + body: ExecutionCompletedEvent, + orchestrator: FromDishka[SagaOrchestrator], ) -> None: await orchestrator.handle_execution_completed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_FAILED) + @broker.subscriber( + ExecutionFailedEvent.topic(prefix), + group_id=GroupId.SAGA_ORCHESTRATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_failed( - body: ExecutionFailedEvent, - orchestrator: FromDishka[SagaOrchestrator], + body: ExecutionFailedEvent, + orchestrator: FromDishka[SagaOrchestrator], ) -> None: await orchestrator.handle_execution_failed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_TIMEOUT) + @broker.subscriber( + ExecutionTimeoutEvent.topic(prefix), + group_id=GroupId.SAGA_ORCHESTRATOR, + ack_policy=AckPolicy.ACK, + ) async def on_execution_timeout( - body: ExecutionTimeoutEvent, - orchestrator: FromDishka[SagaOrchestrator], + body: ExecutionTimeoutEvent, + orchestrator: FromDishka[SagaOrchestrator], ) -> None: await orchestrator.handle_execution_timeout(body) - @sub - async def on_unhandled(body: DomainEvent) -> None: - pass +def register_sse_subscriber(broker: KafkaBroker, settings: Settings) -> None: + """SSE subscriber - listens to multiple event types for real-time updates.""" + prefix = settings.KAFKA_TOPIC_PREFIX + @broker.subscriber( + ExecutionCompletedEvent.topic(prefix), + group_id="sse-bridge-pool", + ack_policy=AckPolicy.ACK_FIRST, + auto_offset_reset="latest", + max_workers=settings.SSE_CONSUMER_POOL_SIZE, + ) + async def on_completed(body: ExecutionCompletedEvent, sse_bus: FromDishka[SSERedisBus]) -> None: + await sse_bus.route_domain_event(body) -def register_sse_subscriber(broker: KafkaBroker, settings: Settings) -> None: @broker.subscriber( - *_topics(settings, GroupId.WEBSOCKET_GATEWAY), + ExecutionFailedEvent.topic(prefix), group_id="sse-bridge-pool", ack_policy=AckPolicy.ACK_FIRST, auto_offset_reset="latest", max_workers=settings.SSE_CONSUMER_POOL_SIZE, ) - async def on_sse_event( - body: DomainEvent, - sse_bus: FromDishka[SSERedisBus], - ) -> None: - if body.event_type in SSERedisBus.SSE_ROUTED_EVENTS: - await sse_bus.route_domain_event(body) + async def on_failed(body: ExecutionFailedEvent, sse_bus: FromDishka[SSERedisBus]) -> None: + await sse_bus.route_domain_event(body) + + @broker.subscriber( + ExecutionTimeoutEvent.topic(prefix), + group_id="sse-bridge-pool", + ack_policy=AckPolicy.ACK_FIRST, + auto_offset_reset="latest", + max_workers=settings.SSE_CONSUMER_POOL_SIZE, + ) + async def on_timeout(body: ExecutionTimeoutEvent, sse_bus: FromDishka[SSERedisBus]) -> None: + await sse_bus.route_domain_event(body) + + @broker.subscriber( + ExecutionCancelledEvent.topic(prefix), + group_id="sse-bridge-pool", + ack_policy=AckPolicy.ACK_FIRST, + auto_offset_reset="latest", + max_workers=settings.SSE_CONSUMER_POOL_SIZE, + ) + async def on_cancelled(body: ExecutionCancelledEvent, sse_bus: FromDishka[SSERedisBus]) -> None: + await sse_bus.route_domain_event(body) def register_notification_subscriber(broker: KafkaBroker, settings: Settings) -> None: - sub = broker.subscriber( - *_topics(settings, GroupId.NOTIFICATION_SERVICE), + prefix = settings.KAFKA_TOPIC_PREFIX + + @broker.subscriber( + ExecutionCompletedEvent.topic(prefix), group_id=GroupId.NOTIFICATION_SERVICE, ack_policy=AckPolicy.ACK, max_poll_records=10, auto_offset_reset="latest", ) - - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_COMPLETED) async def on_execution_completed( - body: ExecutionCompletedEvent, - service: FromDishka[NotificationService], + body: ExecutionCompletedEvent, + service: FromDishka[NotificationService], ) -> None: await service.handle_execution_completed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_FAILED) + @broker.subscriber( + ExecutionFailedEvent.topic(prefix), + group_id=GroupId.NOTIFICATION_SERVICE, + ack_policy=AckPolicy.ACK, + max_poll_records=10, + auto_offset_reset="latest", + ) async def on_execution_failed( - body: ExecutionFailedEvent, - service: FromDishka[NotificationService], + body: ExecutionFailedEvent, + service: FromDishka[NotificationService], ) -> None: await service.handle_execution_failed(body) - @sub(filter=lambda msg: msg.headers["event_type"] == EventType.EXECUTION_TIMEOUT) + @broker.subscriber( + ExecutionTimeoutEvent.topic(prefix), + group_id=GroupId.NOTIFICATION_SERVICE, + ack_policy=AckPolicy.ACK, + max_poll_records=10, + auto_offset_reset="latest", + ) async def on_execution_timeout( - body: ExecutionTimeoutEvent, - service: FromDishka[NotificationService], + body: ExecutionTimeoutEvent, + service: FromDishka[NotificationService], ) -> None: await service.handle_execution_timeout(body) - @sub - async def on_unhandled(body: DomainEvent) -> None: - pass - def register_dlq_subscriber(broker: KafkaBroker, settings: Settings) -> None: """Register a DLQ subscriber that consumes dead-letter messages. - DLQ messages are Avro-encoded DomainEvents (same as every other topic). - DLQ metadata (original_topic, error, retry_count, etc.) lives in Kafka headers. + DLQ messages are stored with topic metadata in headers for replay routing. """ - topic_name = f"{settings.KAFKA_TOPIC_PREFIX}{KafkaTopic.DEAD_LETTER_QUEUE}" + dlq_topic = f"{settings.KAFKA_TOPIC_PREFIX}dead_letter_queue" @broker.subscriber( - topic_name, + dlq_topic, group_id=GroupId.DLQ_MANAGER, ack_policy=AckPolicy.ACK, auto_offset_reset="earliest", ) async def on_dlq_message( - body: DomainEvent, - msg: StreamMessage[Any], - manager: FromDishka[DLQManager], - logger: FromDishka[logging.Logger], + body: BaseEvent, + manager: FromDishka[DLQManager], + logger: FromDishka[logging.Logger], + msg: StreamMessage[Any] = Context("message"), ) -> None: start = asyncio.get_running_loop().time() raw = msg.raw_message @@ -331,13 +337,12 @@ async def on_dlq_message( kind=SpanKind.CONSUMER, attributes={ EventAttributes.KAFKA_TOPIC: str(manager.dlq_topic), - EventAttributes.EVENT_TYPE: body.event_type, EventAttributes.EVENT_ID: body.event_id, }, ): await manager.handle_message(dlq_msg) - manager.metrics.record_dlq_message_received(dlq_msg.original_topic, body.event_type) + manager.metrics.record_dlq_message_received(dlq_msg.original_topic, type(body).__name__) manager.metrics.record_dlq_message_age( (datetime.now(timezone.utc) - dlq_msg.failed_at).total_seconds() ) diff --git a/backend/app/events/schema/schema_registry.py b/backend/app/events/schema/schema_registry.py deleted file mode 100644 index af67d943..00000000 --- a/backend/app/events/schema/schema_registry.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from schema_registry.client import AsyncSchemaRegistryClient, schema -from schema_registry.serializers import AsyncAvroMessageSerializer # type: ignore[attr-defined] - -from app.domain.events.typed import DomainEvent -from app.settings import Settings - - -class SchemaRegistryManager: - """Avro serialization via Confluent Schema Registry. - - Schemas are registered lazily by the underlying serializer on first - produce — no eager bootstrap needed. - """ - - def __init__(self, settings: Settings, logger: logging.Logger): - self.logger = logger - self.namespace = "com.integr8scode.events" - self.subject_prefix = settings.SCHEMA_SUBJECT_PREFIX - self._client = AsyncSchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL) - self.serializer = AsyncAvroMessageSerializer(self._client) - - async def serialize_event(self, event: DomainEvent) -> bytes: - """Serialize event to Confluent wire format: [0x00][4-byte schema id][Avro binary].""" - avro = schema.AvroSchema(event.avro_schema(namespace=self.namespace)) - subject = f"{self.subject_prefix}{avro.name}-value" - return await self.serializer.encode_record_with_schema(subject, avro, event.model_dump()) diff --git a/backend/app/infrastructure/kafka/__init__.py b/backend/app/infrastructure/kafka/__init__.py index 97295a56..97cbf756 100644 --- a/backend/app/infrastructure/kafka/__init__.py +++ b/backend/app/infrastructure/kafka/__init__.py @@ -1,12 +1,15 @@ -from app.domain.events.typed import DomainEvent, EventMetadata -from app.infrastructure.kafka.mappings import get_event_class_for_type, get_topic_for_event -from app.infrastructure.kafka.topics import get_all_topics, get_topic_configs +"""Kafka infrastructure. + +Topic routing is handled by FastStream via BaseEvent.topic() method. +This package provides infrastructure-level Kafka utilities. +""" + +from app.domain.enums.kafka import GroupId, KafkaTopic +from app.domain.events.typed import BaseEvent, EventMetadata __all__ = [ - "DomainEvent", + "BaseEvent", "EventMetadata", - "get_all_topics", - "get_topic_configs", - "get_event_class_for_type", - "get_topic_for_event", + "GroupId", + "KafkaTopic", ] diff --git a/backend/app/infrastructure/kafka/mappings.py b/backend/app/infrastructure/kafka/mappings.py deleted file mode 100644 index e0a41100..00000000 --- a/backend/app/infrastructure/kafka/mappings.py +++ /dev/null @@ -1,104 +0,0 @@ -from functools import lru_cache -from typing import get_args, get_origin - -from app.domain.enums.events import EventType -from app.domain.enums.kafka import KafkaTopic - -# EventType -> KafkaTopic routing -EVENT_TYPE_TO_TOPIC: dict[EventType, KafkaTopic] = { - # Execution events - EventType.EXECUTION_REQUESTED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_ACCEPTED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_QUEUED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_STARTED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_RUNNING: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_COMPLETED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_FAILED: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_TIMEOUT: KafkaTopic.EXECUTION_EVENTS, - EventType.EXECUTION_CANCELLED: KafkaTopic.EXECUTION_EVENTS, - # Pod events - EventType.POD_CREATED: KafkaTopic.POD_EVENTS, - EventType.POD_SCHEDULED: KafkaTopic.POD_EVENTS, - EventType.POD_RUNNING: KafkaTopic.POD_EVENTS, - EventType.POD_SUCCEEDED: KafkaTopic.POD_EVENTS, - EventType.POD_FAILED: KafkaTopic.POD_EVENTS, - EventType.POD_TERMINATED: KafkaTopic.POD_EVENTS, - EventType.POD_DELETED: KafkaTopic.POD_EVENTS, - # Result events - EventType.RESULT_STORED: KafkaTopic.EXECUTION_RESULTS, - EventType.RESULT_FAILED: KafkaTopic.EXECUTION_RESULTS, - # User events - EventType.USER_REGISTERED: KafkaTopic.USER_EVENTS, - EventType.USER_LOGIN: KafkaTopic.USER_EVENTS, - EventType.USER_LOGGED_IN: KafkaTopic.USER_EVENTS, - EventType.USER_LOGGED_OUT: KafkaTopic.USER_EVENTS, - EventType.USER_UPDATED: KafkaTopic.USER_EVENTS, - EventType.USER_DELETED: KafkaTopic.USER_EVENTS, - EventType.USER_SETTINGS_UPDATED: KafkaTopic.USER_SETTINGS_EVENTS, - # Notification events - EventType.NOTIFICATION_CREATED: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_SENT: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_DELIVERED: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_FAILED: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_READ: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_CLICKED: KafkaTopic.NOTIFICATION_EVENTS, - EventType.NOTIFICATION_PREFERENCES_UPDATED: KafkaTopic.NOTIFICATION_EVENTS, - # Script events - EventType.SCRIPT_SAVED: KafkaTopic.SCRIPT_EVENTS, - EventType.SCRIPT_DELETED: KafkaTopic.SCRIPT_EVENTS, - EventType.SCRIPT_SHARED: KafkaTopic.SCRIPT_EVENTS, - # Security events - EventType.SECURITY_VIOLATION: KafkaTopic.SECURITY_EVENTS, - EventType.RATE_LIMIT_EXCEEDED: KafkaTopic.SECURITY_EVENTS, - EventType.AUTH_FAILED: KafkaTopic.SECURITY_EVENTS, - # Resource events - EventType.RESOURCE_LIMIT_EXCEEDED: KafkaTopic.RESOURCE_EVENTS, - EventType.QUOTA_EXCEEDED: KafkaTopic.RESOURCE_EVENTS, - # System events - EventType.SYSTEM_ERROR: KafkaTopic.SYSTEM_EVENTS, - EventType.SERVICE_UNHEALTHY: KafkaTopic.SYSTEM_EVENTS, - EventType.SERVICE_RECOVERED: KafkaTopic.SYSTEM_EVENTS, - # Saga events - EventType.SAGA_STARTED: KafkaTopic.SAGA_EVENTS, - EventType.SAGA_COMPLETED: KafkaTopic.SAGA_EVENTS, - EventType.SAGA_FAILED: KafkaTopic.SAGA_EVENTS, - EventType.SAGA_CANCELLED: KafkaTopic.SAGA_EVENTS, - EventType.SAGA_COMPENSATING: KafkaTopic.SAGA_EVENTS, - EventType.SAGA_COMPENSATED: KafkaTopic.SAGA_EVENTS, - # Saga command events - EventType.CREATE_POD_COMMAND: KafkaTopic.SAGA_COMMANDS, - EventType.DELETE_POD_COMMAND: KafkaTopic.SAGA_COMMANDS, - EventType.ALLOCATE_RESOURCES_COMMAND: KafkaTopic.SAGA_COMMANDS, - EventType.RELEASE_RESOURCES_COMMAND: KafkaTopic.SAGA_COMMANDS, - # DLQ events - EventType.DLQ_MESSAGE_RECEIVED: KafkaTopic.DLQ_EVENTS, - EventType.DLQ_MESSAGE_RETRIED: KafkaTopic.DLQ_EVENTS, - EventType.DLQ_MESSAGE_DISCARDED: KafkaTopic.DLQ_EVENTS, -} - - -@lru_cache(maxsize=1) -def _get_event_type_to_class() -> dict[EventType, type]: - """Build mapping from EventType to event class using DomainEvent union.""" - from app.domain.events.typed import DomainEvent - - union_type = get_args(DomainEvent)[0] - classes = list(get_args(union_type)) if get_origin(union_type) is not None else [union_type] - return {cls.model_fields["event_type"].default: cls for cls in classes} - - -@lru_cache(maxsize=128) -def get_event_class_for_type(event_type: EventType) -> type | None: - """Get the event class for a given event type.""" - return _get_event_type_to_class().get(event_type) - - -@lru_cache(maxsize=128) -def get_topic_for_event(event_type: EventType) -> KafkaTopic: - """Get the Kafka topic for a given event type.""" - return EVENT_TYPE_TO_TOPIC.get(event_type, KafkaTopic.SYSTEM_EVENTS) - - -def get_event_types_for_topic(topic: KafkaTopic) -> list[EventType]: - """Get all event types that publish to a given topic.""" - return [et for et, t in EVENT_TYPE_TO_TOPIC.items() if t == topic] diff --git a/backend/app/infrastructure/kafka/topics.py b/backend/app/infrastructure/kafka/topics.py deleted file mode 100644 index be5ae6d8..00000000 --- a/backend/app/infrastructure/kafka/topics.py +++ /dev/null @@ -1,201 +0,0 @@ -from typing import Any - -from app.domain.enums.kafka import KafkaTopic - - -def get_all_topics() -> set[KafkaTopic]: - """Get all Kafka topics.""" - return set(KafkaTopic) - - -def get_topic_configs() -> dict[KafkaTopic, dict[str, Any]]: - """Get configuration for all Kafka topics.""" - return { - # High-volume execution topics - KafkaTopic.EXECUTION_EVENTS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_COMPLETED: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_FAILED: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_TIMEOUT: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_REQUESTS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_COMMANDS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "86400000", # 1 day - "compression.type": "gzip", - }, - }, - KafkaTopic.EXECUTION_TASKS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "86400000", # 1 day - "compression.type": "gzip", - }, - }, - # Pod lifecycle topics - KafkaTopic.POD_EVENTS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "86400000", # 1 day - "compression.type": "gzip", - }, - }, - KafkaTopic.POD_STATUS_UPDATES: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "86400000", # 1 day - "compression.type": "gzip", - }, - }, - KafkaTopic.POD_RESULTS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # Result topics - KafkaTopic.EXECUTION_RESULTS: { - "num_partitions": 10, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # User topics - KafkaTopic.USER_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "2592000000", # 30 days - "compression.type": "gzip", - }, - }, - KafkaTopic.USER_NOTIFICATIONS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - KafkaTopic.USER_SETTINGS_EVENTS: { - "num_partitions": 3, - "replication_factor": 1, - "config": { - "retention.ms": "2592000000", # 30 days - "compression.type": "gzip", - }, - }, - # Script topics - KafkaTopic.SCRIPT_EVENTS: { - "num_partitions": 3, - "replication_factor": 1, - "config": { - "retention.ms": "2592000000", # 30 days - "compression.type": "gzip", - }, - }, - # Security topics - KafkaTopic.SECURITY_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "2592000000", # 30 days - "compression.type": "gzip", - }, - }, - # Resource topics - KafkaTopic.RESOURCE_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # Notification topics - KafkaTopic.NOTIFICATION_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # System topics - KafkaTopic.SYSTEM_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # Saga topics - KafkaTopic.SAGA_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - # Infrastructure topics - KafkaTopic.DEAD_LETTER_QUEUE: { - "num_partitions": 3, - "replication_factor": 1, - "config": { - "retention.ms": "1209600000", # 14 days - "compression.type": "gzip", - }, - }, - KafkaTopic.WEBSOCKET_EVENTS: { - "num_partitions": 5, - "replication_factor": 1, - "config": { - "retention.ms": "86400000", # 1 day - "compression.type": "gzip", - }, - }, - } diff --git a/backend/app/main.py b/backend/app/main.py index 607ba1b7..8f67b2a4 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,6 +3,7 @@ from dishka.integrations.faststream import setup_dishka as setup_dishka_faststream from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from faststream.kafka import KafkaBroker from app.api.routes import ( auth, @@ -40,12 +41,10 @@ RequestSizeLimitMiddleware, setup_metrics, ) -from app.events.broker import create_broker from app.events.handlers import ( register_notification_subscriber, register_sse_subscriber, ) -from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings @@ -61,8 +60,7 @@ def create_app(settings: Settings | None = None) -> FastAPI: logger = setup_logger(settings.LOG_LEVEL) # Create Kafka broker and register in-app subscribers - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_sse_subscriber(broker, settings) register_notification_subscriber(broker, settings) diff --git a/backend/app/schemas_pydantic/admin_events.py b/backend/app/schemas_pydantic/admin_events.py index 4c8c7e61..d8ad47f0 100644 --- a/backend/app/schemas_pydantic/admin_events.py +++ b/backend/app/schemas_pydantic/admin_events.py @@ -2,18 +2,17 @@ from pydantic import BaseModel, ConfigDict, Field, computed_field -from app.domain.enums.events import EventType from app.domain.events.event_models import EventSummary -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.domain.replay import ReplayError -from app.schemas_pydantic.events import EventTypeCountSchema, HourlyEventCountSchema +from app.schemas_pydantic.events import HourlyEventCountSchema, TopicCountSchema from app.schemas_pydantic.execution import ExecutionResult class EventFilter(BaseModel): """Filter criteria for browsing events""" - event_types: list[EventType] | None = None + topics: list[str] | None = None aggregate_id: str | None = None correlation_id: str | None = None user_id: str | None = None @@ -48,7 +47,7 @@ class EventReplayRequest(BaseModel): class EventBrowseResponse(BaseModel): """Response model for browsing events""" - events: list[DomainEvent] + events: list[BaseEvent] total: int skip: int limit: int @@ -57,7 +56,7 @@ class EventBrowseResponse(BaseModel): class EventDetailResponse(BaseModel): """Response model for event detail""" - event: DomainEvent + event: BaseEvent related_events: list[EventSummary] timeline: list[EventSummary] @@ -120,7 +119,7 @@ class EventStatsResponse(BaseModel): model_config = ConfigDict(from_attributes=True) total_events: int - events_by_type: list[EventTypeCountSchema] + events_by_topic: list[TopicCountSchema] events_by_hour: list[HourlyEventCountSchema] top_users: list[UserEventCountSchema] error_rate: float diff --git a/backend/app/schemas_pydantic/admin_user_overview.py b/backend/app/schemas_pydantic/admin_user_overview.py index 52301677..c268986d 100644 --- a/backend/app/schemas_pydantic/admin_user_overview.py +++ b/backend/app/schemas_pydantic/admin_user_overview.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, ConfigDict -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.schemas_pydantic.events import EventStatistics from app.schemas_pydantic.user import UserResponse @@ -30,6 +30,6 @@ class AdminUserOverview(BaseModel): stats: EventStatistics derived_counts: DerivedCounts rate_limit_summary: RateLimitSummary - recent_events: list[DomainEvent] = [] + recent_events: list[BaseEvent] = [] model_config = ConfigDict(from_attributes=True) diff --git a/backend/app/schemas_pydantic/dlq.py b/backend/app/schemas_pydantic/dlq.py index 4093d03f..7876f1e1 100644 --- a/backend/app/schemas_pydantic/dlq.py +++ b/backend/app/schemas_pydantic/dlq.py @@ -10,7 +10,7 @@ RetryStrategy, TopicStatistic, ) -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent class DLQStats(BaseModel): @@ -30,7 +30,7 @@ class DLQMessageResponse(BaseModel): model_config = ConfigDict(from_attributes=True) - event: DomainEvent + event: BaseEvent original_topic: str error: str retry_count: int @@ -101,7 +101,7 @@ class DLQMessageDetail(BaseModel): model_config = ConfigDict(from_attributes=True) - event: DomainEvent + event: BaseEvent original_topic: str error: str retry_count: int diff --git a/backend/app/schemas_pydantic/events.py b/backend/app/schemas_pydantic/events.py index 93d560b4..cef02ac5 100644 --- a/backend/app/schemas_pydantic/events.py +++ b/backend/app/schemas_pydantic/events.py @@ -5,16 +5,15 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator from app.domain.enums.common import Environment, SortOrder -from app.domain.enums.events import EventType -from app.domain.events.typed import ContainerStatusInfo, DomainEvent +from app.domain.events.typed import BaseEvent, ContainerStatusInfo -class EventTypeCountSchema(BaseModel): - """Event count by type.""" +class TopicCountSchema(BaseModel): + """Event count by topic.""" model_config = ConfigDict(from_attributes=True) - event_type: EventType + topic: str count: int @@ -56,7 +55,7 @@ class EventSummaryResponse(BaseModel): model_config = ConfigDict(from_attributes=True) event_id: str - event_type: EventType + topic: str timestamp: datetime aggregate_id: str | None = None @@ -64,7 +63,7 @@ class EventSummaryResponse(BaseModel): class EventListResponse(BaseModel): model_config = ConfigDict(from_attributes=True) - events: list[DomainEvent] + events: list[BaseEvent] total: int limit: int skip: int @@ -74,7 +73,7 @@ class EventListResponse(BaseModel): class EventFilterRequest(BaseModel): """Request model for filtering events.""" - event_types: list[EventType] | None = Field(None, description="Filter by event types") + topics: list[str] | None = Field(None, description="Filter by event topics") aggregate_id: str | None = Field(None, description="Filter by aggregate ID") correlation_id: str | None = Field(None, description="Filter by correlation ID") user_id: str | None = Field(None, description="Filter by user ID (admin only)") @@ -90,7 +89,7 @@ class EventFilterRequest(BaseModel): @field_validator("sort_by") @classmethod def validate_sort_field(cls, v: str) -> str: - allowed_fields = {"timestamp", "event_type", "aggregate_id", "correlation_id", "stored_at"} + allowed_fields = {"timestamp", "topic", "aggregate_id", "correlation_id", "stored_at"} if v not in allowed_fields: raise ValueError(f"Sort field must be one of {allowed_fields}") return v @@ -106,7 +105,7 @@ class EventAggregationRequest(BaseModel): class PublishEventRequest(BaseModel): """Request model for publishing events.""" - event_type: EventType = Field(..., description="Type of event to publish") + topic: str = Field(..., description="Topic name for the event") payload: dict[str, Any] = Field(..., description="Event payload data") aggregate_id: str | None = Field(None, description="Aggregate root ID") correlation_id: str | None = Field(None, description="Correlation ID") @@ -118,7 +117,7 @@ class EventBase(BaseModel): """Base event model for API responses.""" event_id: str = Field(default_factory=lambda: str(uuid4())) - event_type: EventType + topic: str event_version: str = "1.0" timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) aggregate_id: str | None = None @@ -131,7 +130,7 @@ class EventBase(BaseModel): json_schema_extra={ "example": { "event_id": "550e8400-e29b-41d4-a716-446655440000", - "event_type": EventType.EXECUTION_REQUESTED, + "topic": "execution_requested", "event_version": "1.0", "timestamp": "2024-01-20T10:30:00Z", "aggregate_id": "execution-123", @@ -192,7 +191,7 @@ class EventInDB(EventBase): class EventQuery(BaseModel): """Query parameters for event search.""" - event_types: list[EventType] | None = None + topics: list[str] | None = None aggregate_id: str | None = None correlation_id: str | None = None user_id: str | None = None @@ -206,7 +205,7 @@ class EventQuery(BaseModel): model_config = ConfigDict( json_schema_extra={ "example": { - "event_types": [EventType.EXECUTION_REQUESTED, EventType.EXECUTION_COMPLETED], + "topics": ["execution_requested", "execution_completed"], "user_id": "user-123", "start_time": "2024-01-20T00:00:00Z", "end_time": "2024-01-20T23:59:59Z", @@ -221,7 +220,7 @@ class EventStatistics(BaseModel): """Event statistics response.""" total_events: int - events_by_type: list[EventTypeCountSchema] + events_by_topic: list[TopicCountSchema] events_by_service: list[ServiceEventCountSchema] events_by_hour: list[HourlyEventCountSchema] start_time: datetime | None = None @@ -232,10 +231,10 @@ class EventStatistics(BaseModel): json_schema_extra={ "example": { "total_events": 1543, - "events_by_type": [ - {"event_type": "EXECUTION_REQUESTED", "count": 523}, - {"event_type": "EXECUTION_COMPLETED", "count": 498}, - {"event_type": "POD_CREATED", "count": 522}, + "events_by_topic": [ + {"topic": "execution_requested", "count": 523}, + {"topic": "execution_completed", "count": 498}, + {"topic": "pod_created", "count": 522}, ], "events_by_service": [ {"service_name": "api-gateway", "count": 523}, @@ -255,7 +254,7 @@ class EventProjection(BaseModel): name: str description: str | None = None - source_events: list[EventType] # Event types to include + source_topics: list[str] # Event topics to include aggregation_pipeline: list[dict[str, Any]] output_collection: str refresh_interval_seconds: int = 300 # 5 minutes default @@ -266,9 +265,9 @@ class EventProjection(BaseModel): "example": { "name": "execution_summary", "description": "Summary of executions by user and status", - "source_events": [EventType.EXECUTION_REQUESTED, EventType.EXECUTION_COMPLETED], + "source_topics": ["execution_requested", "execution_completed"], "aggregation_pipeline": [ - {"$match": {"event_type": {"$in": [EventType.EXECUTION_REQUESTED, EventType.EXECUTION_COMPLETED]}}}, + {"$match": {"topic": {"$in": ["execution_requested", "execution_completed"]}}}, { "$group": { "_id": {"user_id": "$metadata.user_id", "status": "$payload.status"}, @@ -314,7 +313,7 @@ class ReplayAggregateResponse(BaseModel): dry_run: bool aggregate_id: str event_count: int | None = None - event_types: list[EventType] | None = None + topics: list[str] | None = None start_time: datetime | None = None end_time: datetime | None = None replayed_count: int | None = None diff --git a/backend/app/schemas_pydantic/sse.py b/backend/app/schemas_pydantic/sse.py index b6d9874d..81cf959f 100644 --- a/backend/app/schemas_pydantic/sse.py +++ b/backend/app/schemas_pydantic/sse.py @@ -56,7 +56,7 @@ class SSEExecutionEventData(BaseModel): class RedisSSEMessage(BaseModel): """Message structure published to Redis for execution SSE delivery.""" - event_type: EventType = Field(description="Event type from Kafka") + event_type: str = Field(description="Event topic name from Kafka") execution_id: str | None = Field(None, description="Execution ID") data: dict[str, Any] = Field(description="Full event data from BaseEvent.model_dump()") diff --git a/backend/app/services/admin/admin_events_service.py b/backend/app/services/admin/admin_events_service.py index f3380578..f167f6e6 100644 --- a/backend/app/services/admin/admin_events_service.py +++ b/backend/app/services/admin/admin_events_service.py @@ -264,7 +264,6 @@ async def delete_event(self, *, event_id: str, deleted_by: str) -> bool: "Event deleted", extra={ "event_id": event_id, - "event_type": detail.event.event_type, "correlation_id": correlation_id, "deleted_by": deleted_by, }, diff --git a/backend/app/services/admin/admin_user_service.py b/backend/app/services/admin/admin_user_service.py index 619b2208..76c9a17a 100644 --- a/backend/app/services/admin/admin_user_service.py +++ b/backend/app/services/admin/admin_user_service.py @@ -5,7 +5,6 @@ from app.core.security import SecurityService from app.db.repositories.admin.admin_user_repository import AdminUserRepository from app.domain.admin import AdminUserOverviewDomain, DerivedCountsDomain, RateLimitSummaryDomain -from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus from app.domain.enums.user import UserRole from app.domain.rate_limit import RateLimitUpdateResult, UserRateLimit, UserRateLimitsResult @@ -74,17 +73,17 @@ def _count(status: ExecutionStatus) -> int: ) # Recent execution-related events (last 10) - event_types: list[EventType] = [ - EventType.EXECUTION_REQUESTED, - EventType.EXECUTION_STARTED, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_TIMEOUT, - EventType.EXECUTION_CANCELLED, + execution_topics = [ + "execution_requested", + "execution_started", + "execution_completed", + "execution_failed", + "execution_timeout", + "execution_cancelled", ] recent_result = await self._events.get_user_events_paginated( user_id=user_id, - event_types=event_types, + topics=execution_topics, start_time=start, end_time=now, limit=10, diff --git a/backend/app/services/coordinator/coordinator.py b/backend/app/services/coordinator/coordinator.py index d0bcffbc..d68ffe35 100644 --- a/backend/app/services/coordinator/coordinator.py +++ b/backend/app/services/coordinator/coordinator.py @@ -18,7 +18,7 @@ ExecutionFailedEvent, ExecutionRequestedEvent, ) -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher class QueueRejectError(Exception): @@ -29,19 +29,20 @@ class QueueRejectError(Exception): class ExecutionCoordinator: - """ - Coordinates execution scheduling across the system. + """Coordinates execution scheduling across the system. This service: 1. Consumes ExecutionRequested events 2. Manages execution queue with priority 3. Enforces per-user rate limits 4. Publishes CreatePodCommand events for workers + + Idempotency is handled by FastStream middleware (IdempotencyMiddleware). """ def __init__( self, - producer: UnifiedProducer, + producer: EventPublisher, execution_repository: ExecutionRepository, logger: logging.Logger, coordinator_metrics: CoordinatorMetrics, @@ -71,7 +72,7 @@ def __init__( async def handle_execution_requested(self, event: ExecutionRequestedEvent) -> None: """Handle execution requested event - add to queue for processing.""" - self.logger.info(f"HANDLER CALLED: handle_execution_requested for event {event.event_id}") + self.logger.info(f"Handling execution_requested for event {event.event_id}") start_time = time.time() try: @@ -301,7 +302,7 @@ async def _publish_create_pod_command(self, request: ExecutionRequestedEvent) -> metadata=metadata, ) - await self.producer.produce(event_to_produce=create_pod_cmd, key=request.execution_id) + await self.producer.publish(event=create_pod_cmd, key=request.execution_id) async def _publish_execution_accepted(self, request: ExecutionRequestedEvent, position: int) -> None: """Publish execution accepted event to notify that request was valid and queued.""" @@ -315,7 +316,7 @@ async def _publish_execution_accepted(self, request: ExecutionRequestedEvent, po metadata=request.metadata, ) - await self.producer.produce(event_to_produce=event, key=request.execution_id) + await self.producer.publish(event=event, key=request.execution_id) async def _publish_queue_full(self, request: ExecutionRequestedEvent, error: str) -> None: """Publish queue full event.""" @@ -329,7 +330,7 @@ async def _publish_queue_full(self, request: ExecutionRequestedEvent, error: str error_message=error, ) - await self.producer.produce(event_to_produce=event, key=request.execution_id) + await self.producer.publish(event=event, key=request.execution_id) async def _publish_scheduling_failed(self, request: ExecutionRequestedEvent, error: str) -> None: """Publish scheduling failed event.""" @@ -343,4 +344,4 @@ async def _publish_scheduling_failed(self, request: ExecutionRequestedEvent, err error_message=error, ) - await self.producer.produce(event_to_produce=event, key=request.execution_id) + await self.producer.publish(event=event, key=request.execution_id) diff --git a/backend/app/services/event_replay/replay_service.py b/backend/app/services/event_replay/replay_service.py index 0ff77b11..ba8ae72a 100644 --- a/backend/app/services/event_replay/replay_service.py +++ b/backend/app/services/event_replay/replay_service.py @@ -14,7 +14,7 @@ from app.db.repositories.replay_repository import ReplayRepository from app.domain.admin.replay_updates import ReplaySessionUpdate from app.domain.enums.replay import ReplayStatus, ReplayTarget -from app.domain.events.typed import DomainEvent, DomainEventAdapter +from app.domain.events.typed import BaseEvent from app.domain.replay import ( CleanupResult, ReplayConfig, @@ -24,14 +24,14 @@ ReplaySessionNotFoundError, ReplaySessionState, ) -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher class EventReplayService: def __init__( self, repository: ReplayRepository, - producer: UnifiedProducer, + producer: EventPublisher, replay_metrics: ReplayMetrics, logger: logging.Logger, ) -> None: @@ -214,20 +214,20 @@ async def _await_if_paused(self, session: ReplaySessionState) -> None: if resume_event: await resume_event.wait() - async def _fetch_event_batches(self, session: ReplaySessionState) -> AsyncIterator[list[DomainEvent]]: + async def _fetch_event_batches(self, session: ReplaySessionState) -> AsyncIterator[list[BaseEvent]]: events_processed = 0 max_events = session.config.max_events async for batch_docs in self._repository.fetch_events( replay_filter=session.config.filter, batch_size=session.config.batch_size ): - batch: list[DomainEvent] = [] + batch: list[BaseEvent] = [] for doc in batch_docs: if max_events and events_processed >= max_events: break try: - event = DomainEventAdapter.validate_python(doc) + event = BaseEvent.model_validate(doc) except ValidationError as e: session.failed_events += 1 self.logger.warning( @@ -245,7 +245,7 @@ async def _fetch_event_batches(self, session: ReplaySessionState) -> AsyncIterat if max_events and events_processed >= max_events: break - async def _process_batch(self, session: ReplaySessionState, batch: list[DomainEvent]) -> None: + async def _process_batch(self, session: ReplaySessionState, batch: list[BaseEvent]) -> None: with trace_span( name="event_replay.process_batch", kind=SpanKind.INTERNAL, @@ -283,12 +283,12 @@ async def _process_batch(self, session: ReplaySessionState, batch: list[DomainEv else: session.failed_events += 1 self._metrics.record_event_replayed( - session.config.replay_type, event.event_type, "success" if success else "failed" + session.config.replay_type, type(event).topic(), "success" if success else "failed" ) session.last_event_at = event.timestamp await self._update_session_in_db(session) - async def _replay_event(self, session: ReplaySessionState, event: DomainEvent) -> bool: + async def _replay_event(self, session: ReplaySessionState, event: BaseEvent) -> bool: config = session.config attempts = config.retry_attempts if config.retry_failed else 1 @@ -298,7 +298,7 @@ async def _replay_event(self, session: ReplaySessionState, event: DomainEvent) - case ReplayTarget.KAFKA: if not config.preserve_timestamps: event.timestamp = datetime.now(timezone.utc) - await self._producer.produce(event_to_produce=event, key=event.aggregate_id or event.event_id) + await self._producer.publish(event=event, key=event.aggregate_id or event.event_id) case ReplayTarget.FILE: if not config.target_file_path: self.logger.error("No target file path specified") @@ -320,7 +320,7 @@ async def _replay_event(self, session: ReplaySessionState, event: DomainEvent) - return False - async def _write_event_to_file(self, event: DomainEvent, file_path: str) -> None: + async def _write_event_to_file(self, event: BaseEvent, file_path: str) -> None: if file_path not in self._file_locks: self._file_locks[file_path] = asyncio.Lock() diff --git a/backend/app/services/event_service.py b/backend/app/services/event_service.py index 1418df26..703a5b5a 100644 --- a/backend/app/services/event_service.py +++ b/backend/app/services/event_service.py @@ -2,11 +2,10 @@ from typing import Any from app.db.repositories.event_repository import EventRepository -from app.domain.enums.events import EventType from app.domain.enums.user import UserRole from app.domain.events import ( ArchivedEvent, - DomainEvent, + BaseEvent, EventAggregationResult, EventFilter, EventListResult, @@ -19,8 +18,8 @@ def _filter_to_mongo_query(flt: EventFilter) -> dict[str, Any]: """Convert EventFilter to MongoDB query dict.""" query: dict[str, Any] = {} - if flt.event_types: - query["event_type"] = {"$in": flt.event_types} + if flt.topics: + query["topic"] = {"$in": flt.topics} if flt.aggregate_id: query["aggregate_id"] = flt.aggregate_id if flt.correlation_id: @@ -89,7 +88,7 @@ async def get_execution_events( async def get_user_events_paginated( self, user_id: str, - event_types: list[EventType] | None = None, + topics: list[str] | None = None, start_time: datetime | None = None, end_time: datetime | None = None, limit: int = 100, @@ -98,7 +97,7 @@ async def get_user_events_paginated( ) -> EventListResult: return await self.repository.get_user_events_paginated( user_id=user_id, - event_types=event_types, + topics=topics, start_time=start_time, end_time=end_time, limit=limit, @@ -174,7 +173,7 @@ async def get_event( event_id: str, user_id: str, user_role: UserRole, - ) -> DomainEvent | None: + ) -> BaseEvent | None: event = await self.repository.get_event(event_id) if not event: return None @@ -199,13 +198,13 @@ async def aggregate_events( new_pipeline.insert(0, {"$match": user_filter}) return await self.repository.aggregate_events(new_pipeline, limit=limit) - async def list_event_types( + async def list_topics( self, user_id: str, user_role: UserRole, ) -> list[str]: match = self._build_user_filter(user_id, user_role) - return await self.repository.list_event_types(match=match) + return await self.repository.list_topics(match=match) async def delete_event_with_archival( self, @@ -225,11 +224,11 @@ async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo async def get_events_by_aggregate( self, aggregate_id: str, - event_types: list[EventType] | None = None, + topics: list[str] | None = None, limit: int = 100, - ) -> list[DomainEvent]: + ) -> list[BaseEvent]: return await self.repository.get_events_by_aggregate( aggregate_id=aggregate_id, - event_types=event_types, + topics=topics, limit=limit, ) diff --git a/backend/app/services/execution_service.py b/backend/app/services/execution_service.py index 59a7b556..c3728901 100644 --- a/backend/app/services/execution_service.py +++ b/backend/app/services/execution_service.py @@ -8,10 +8,9 @@ from app.core.metrics import ExecutionMetrics from app.db.repositories.event_repository import EventRepository from app.db.repositories.execution_repository import ExecutionRepository -from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus, QueuePriority from app.domain.events.typed import ( - DomainEvent, + BaseEvent, EventMetadata, ExecutionCancelledEvent, ExecutionRequestedEvent, @@ -24,13 +23,13 @@ ExecutionResultDomain, ResourceLimitsDomain, ) -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.runtime_registry import RUNTIME_REGISTRY from app.settings import Settings # Type aliases for better readability UserId: TypeAlias = str -EventFilter: TypeAlias = list[EventType] | None +TopicsFilter: TypeAlias = list[str] | None TimeRange: TypeAlias = tuple[datetime | None, datetime | None] ExecutionQuery: TypeAlias = dict[str, Any] ExecutionStats: TypeAlias = dict[str, Any] @@ -48,7 +47,7 @@ class ExecutionService: def __init__( self, execution_repo: ExecutionRepository, - producer: UnifiedProducer, + producer: EventPublisher, event_repository: EventRepository, settings: Settings, logger: logging.Logger, @@ -213,7 +212,7 @@ async def execute_script( # Publish to Kafka; on failure, mark error and raise try: - await self.producer.produce(event_to_produce=event, key=created_execution.execution_id) + await self.producer.publish(event=event, key=created_execution.execution_id) except Exception as e: # pragma: no cover - mapped behavior self.metrics.record_script_execution(ExecutionStatus.ERROR, lang_and_version) self.metrics.record_error(type(e).__name__) @@ -289,22 +288,22 @@ async def get_execution_result(self, execution_id: str) -> DomainExecution: async def get_execution_events( self, execution_id: str, - event_types: EventFilter = None, + topics: list[str] | None = None, limit: int = 100, - ) -> list[DomainEvent]: + ) -> list[BaseEvent]: """ Get all events for an execution from the event store. Args: execution_id: UUID of the execution. - event_types: Filter by specific event types. + topics: Filter by specific event topics. limit: Maximum number of events to return. Returns: List of events for the execution. """ result = await self.event_repository.get_execution_events( - execution_id=execution_id, event_types=event_types, limit=limit, + execution_id=execution_id, topics=topics, limit=limit, ) events = result.events @@ -313,7 +312,7 @@ async def get_execution_events( extra={ "execution_id": execution_id, "event_count": len(events), - "event_types": event_types, + "topics": topics, }, ) @@ -425,6 +424,36 @@ def _build_user_query( return query + async def cancel_execution(self, execution_id: str, reason: str, user_id: str | None = None) -> None: + """ + Cancel an execution by publishing a cancellation event. + + Args: + execution_id: UUID of execution to cancel. + reason: Reason for cancellation. + user_id: ID of user requesting cancellation. + """ + metadata = self._create_event_metadata(user_id=user_id) + + event = ExecutionCancelledEvent( + execution_id=execution_id, + aggregate_id=execution_id, + reason=reason, + cancelled_by=user_id, + metadata=metadata, + ) + + await self.producer.publish(event=event, key=execution_id) + + self.logger.info( + "Published cancellation event", + extra={ + "execution_id": execution_id, + "event_id": str(event.event_id), + "reason": reason, + }, + ) + async def delete_execution(self, execution_id: str) -> bool: """ Delete an execution and publish deletion event. @@ -461,7 +490,7 @@ async def _publish_deletion_event(self, execution_id: str) -> None: execution_id=execution_id, reason="user_requested", cancelled_by=metadata.user_id, metadata=metadata ) - await self.producer.produce(event_to_produce=event, key=execution_id) + await self.producer.publish(event=event, key=execution_id) self.logger.info( "Published cancellation event", diff --git a/backend/app/services/idempotency/__init__.py b/backend/app/services/idempotency/__init__.py index f429827d..e1ad57b8 100644 --- a/backend/app/services/idempotency/__init__.py +++ b/backend/app/services/idempotency/__init__.py @@ -4,10 +4,12 @@ IdempotencyManager, IdempotencyResult, ) +from app.services.idempotency.middleware import IdempotencyMiddleware __all__ = [ "IdempotencyConfig", "IdempotencyManager", + "IdempotencyMiddleware", "IdempotencyResult", "IdempotencyStatus", ] diff --git a/backend/app/services/idempotency/idempotency_manager.py b/backend/app/services/idempotency/idempotency_manager.py index 41dc64ac..55534236 100644 --- a/backend/app/services/idempotency/idempotency_manager.py +++ b/backend/app/services/idempotency/idempotency_manager.py @@ -48,8 +48,9 @@ def __init__( def _generate_key( self, event: BaseEvent, key_strategy: KeyStrategy, custom_key: str | None = None, fields: set[str] | None = None ) -> str: + topic = type(event).topic() if key_strategy == KeyStrategy.EVENT_BASED: - key = f"{event.event_type}:{event.event_id}" + key = f"{topic}:{event.event_id}" elif key_strategy == KeyStrategy.CONTENT_HASH: event_dict = event.model_dump(mode="json") event_dict.pop("event_id", None) @@ -60,7 +61,7 @@ def _generate_key( content = json.dumps(event_dict, sort_keys=True) key = hashlib.sha256(content.encode()).hexdigest() elif key_strategy == KeyStrategy.CUSTOM and custom_key: - key = f"{event.event_type}:{custom_key}" + key = f"{topic}:{custom_key}" else: raise ValueError(f"Invalid key strategy: {key_strategy}") return f"{self.config.key_prefix}:{key}" @@ -76,12 +77,13 @@ async def check_and_reserve( full_key = self._generate_key(event, key_strategy, custom_key, fields) ttl = ttl_seconds or self.config.default_ttl_seconds + topic = type(event).topic() existing = await self._repo.find_by_key(full_key) if existing: - self.metrics.record_idempotency_cache_hit(event.event_type, "check_and_reserve") - return await self._handle_existing_key(existing, full_key, event.event_type) + self.metrics.record_idempotency_cache_hit(topic, "check_and_reserve") + return await self._handle_existing_key(existing, full_key, topic) - self.metrics.record_idempotency_cache_miss(event.event_type, "check_and_reserve") + self.metrics.record_idempotency_cache_miss(topic, "check_and_reserve") return await self._create_new_key(full_key, event, ttl) async def _handle_existing_key( @@ -136,11 +138,12 @@ async def _handle_processing_key( async def _create_new_key(self, full_key: str, event: BaseEvent, ttl: int) -> IdempotencyResult: created_at = datetime.now(timezone.utc) + topic = type(event).topic() try: record = IdempotencyRecord( key=full_key, status=IdempotencyStatus.PROCESSING, - event_type=event.event_type, + event_type=topic, event_id=str(event.event_id), created_at=created_at, ttl_seconds=ttl, @@ -154,7 +157,7 @@ async def _create_new_key(self, full_key: str, event: BaseEvent, ttl: int) -> Id # Race: someone inserted the same key concurrently — treat as existing existing = await self._repo.find_by_key(full_key) if existing: - return await self._handle_existing_key(existing, full_key, event.event_type) + return await self._handle_existing_key(existing, full_key, topic) # If for some reason it's still not found, allow processing return IdempotencyResult( is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=created_at, key=full_key @@ -240,3 +243,63 @@ async def get_cached_json( existing = await self._repo.find_by_key(full_key) assert existing and existing.result_json is not None, "Invariant: cached result must exist when requested" return existing.result_json + + # ------------------------------------------------------------------------- + # Key-based methods for middleware (bypass event object requirement) + # ------------------------------------------------------------------------- + + async def reserve_by_key( + self, + key: str, + event_type: str, + ttl_seconds: int | None = None, + ) -> bool: + """Reserve a key for processing. Returns False if duplicate (should skip). + + Used by middleware where we compute the key from headers/body directly. + """ + full_key = f"{self.config.key_prefix}:{key}" + ttl = ttl_seconds or self.config.default_ttl_seconds + + existing = await self._repo.find_by_key(full_key) + if existing: + self.metrics.record_idempotency_cache_hit(event_type, "reserve_by_key") + result = await self._handle_existing_key(existing, full_key, event_type) + return not result.is_duplicate + + self.metrics.record_idempotency_cache_miss(event_type, "reserve_by_key") + created_at = datetime.now(timezone.utc) + try: + record = IdempotencyRecord( + key=full_key, + status=IdempotencyStatus.PROCESSING, + event_type=event_type, + event_id=key, # Use key as event_id for tracking + created_at=created_at, + ttl_seconds=ttl, + ) + await self._repo.insert_processing(record) + self.metrics.increment_idempotency_keys(self.config.key_prefix) + return True # Reserved successfully, proceed with processing + except DuplicateKeyError: + # Race condition: another consumer reserved it first + self.metrics.record_idempotency_duplicate_blocked(event_type) + return False + + async def complete_by_key(self, key: str) -> bool: + """Mark a key as completed. Used by middleware after successful processing.""" + full_key = f"{self.config.key_prefix}:{key}" + existing = await self._repo.find_by_key(full_key) + if not existing: + self.logger.warning(f"Idempotency key {full_key} not found when marking completed") + return False + return await self._update_key_status(full_key, existing, IdempotencyStatus.COMPLETED) + + async def fail_by_key(self, key: str, error: str) -> bool: + """Mark a key as failed. Used by middleware after failed processing.""" + full_key = f"{self.config.key_prefix}:{key}" + existing = await self._repo.find_by_key(full_key) + if not existing: + self.logger.warning(f"Idempotency key {full_key} not found when marking failed") + return False + return await self._update_key_status(full_key, existing, IdempotencyStatus.FAILED, error=error) diff --git a/backend/app/services/idempotency/middleware.py b/backend/app/services/idempotency/middleware.py new file mode 100644 index 00000000..719ac7c0 --- /dev/null +++ b/backend/app/services/idempotency/middleware.py @@ -0,0 +1,109 @@ +import json +from collections.abc import Awaitable, Callable, Mapping +from dataclasses import dataclass +from hashlib import sha256 +from typing import Any, Final + +import redis.asyncio as aioredis +from faststream import BaseMiddleware +from faststream.message import StreamMessage + +from app.domain.idempotency import KeyStrategy + + +@dataclass(frozen=True) +class Rule: + strategy: KeyStrategy + ttl: int + + +# Topic name -> idempotency rule +# Topic names match event class names in snake_case (e.g., "execution_requested") +RULES: Final[Mapping[str, Rule]] = { + "execution_requested": Rule(KeyStrategy.EVENT_BASED, 7200), + "execution_completed": Rule(KeyStrategy.EVENT_BASED, 7200), + "execution_failed": Rule(KeyStrategy.EVENT_BASED, 7200), + "execution_cancelled": Rule(KeyStrategy.EVENT_BASED, 7200), + "execution_timeout": Rule(KeyStrategy.EVENT_BASED, 7200), + "create_pod_command": Rule(KeyStrategy.CONTENT_HASH, 3600), + "delete_pod_command": Rule(KeyStrategy.CONTENT_HASH, 3600), +} + + +def extract_event_id(body: bytes) -> str: + """Extract event_id from JSON body.""" + try: + data = json.loads(body) + return str(data.get("event_id", "")) + except (json.JSONDecodeError, TypeError): + return "" + + +def get_topic_name(msg: StreamMessage[Any], prefix: str = "") -> str: + """Get topic name from message, stripping prefix if present.""" + raw = msg.raw_message + topic = getattr(raw, "topic", "") or "" + if prefix and topic.startswith(prefix): + return topic[len(prefix):] + return topic + + +def compute_key(rule: Rule, topic: str, msg: StreamMessage[Any]) -> str: + """Compute idempotency key based on strategy.""" + if rule.strategy == KeyStrategy.EVENT_BASED: + event_id = extract_event_id(msg.body) + return f"idem:{topic}:{event_id}" if event_id else "" + elif rule.strategy == KeyStrategy.CONTENT_HASH: + return f"idem:{topic}:{sha256(msg.body).hexdigest()}" if msg.body else "" + return "" + + +class IdempotencyMiddleware: + """Factory that creates per-message middleware instances. + + Minimal Kafka idempotency using Redis SET NX EX: + - Reserve: SET key "1" NX EX ttl + - Complete: key expires naturally + - Fail: DEL key to allow retry + """ + + def __init__(self, redis: aioredis.Redis, topic_prefix: str = "") -> None: + self._redis = redis + self._topic_prefix = topic_prefix + + def __call__(self, msg: Any = None, **kwargs: Any) -> "_IdempotencyMiddleware": + return _IdempotencyMiddleware(self._redis, self._topic_prefix, msg, **kwargs) + + +class _IdempotencyMiddleware(BaseMiddleware[Any, Any]): + """Per-message middleware instance.""" + + def __init__(self, redis: aioredis.Redis, topic_prefix: str, msg: Any = None, **kwargs: Any) -> None: + super().__init__(msg, **kwargs) + self._redis = redis + self._topic_prefix = topic_prefix + + async def consume_scope( + self, + call_next: Callable[[StreamMessage[Any]], Awaitable[Any]], + msg: StreamMessage[Any], + ) -> Any: + topic = get_topic_name(msg, self._topic_prefix) + rule = RULES.get(topic) + + if not rule: + return await call_next(msg) + + key = compute_key(rule, topic, msg) + if not key: + return await call_next(msg) + + reserved = await self._redis.set(key, b"1", nx=True, ex=rule.ttl) + if not reserved: + return None + + try: + return await call_next(msg) + except BaseException: + await self._redis.delete(key) + raise diff --git a/backend/app/services/k8s_worker/worker.py b/backend/app/services/k8s_worker/worker.py index b7bdba7f..6b0e48e0 100644 --- a/backend/app/services/k8s_worker/worker.py +++ b/backend/app/services/k8s_worker/worker.py @@ -16,7 +16,7 @@ ExecutionStartedEvent, PodCreatedEvent, ) -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.runtime_registry import RUNTIME_REGISTRY from app.settings import Settings @@ -24,8 +24,7 @@ class KubernetesWorker: - """ - Worker service that creates Kubernetes pods from execution events. + """Worker service that creates Kubernetes pods from execution events. This service: 1. Handles CreatePodCommand events from saga orchestrator @@ -34,12 +33,13 @@ class KubernetesWorker: 4. Publishes PodCreated events Lifecycle is managed by DI - consumer is injected already started. + Idempotency is handled by FastStream middleware (IdempotencyMiddleware). """ def __init__( self, api_client: k8s_client.ApiClient, - producer: UnifiedProducer, + producer: EventPublisher, settings: Settings, logger: logging.Logger, event_metrics: EventMetrics, @@ -72,10 +72,9 @@ def __init__( self.logger.info(f"KubernetesWorker initialized for namespace {self._settings.K8S_NAMESPACE}") async def handle_create_pod_command(self, command: CreatePodCommandEvent) -> None: - """Handle create pod command from saga orchestrator""" + """Handle create pod command from saga orchestrator.""" execution_id = command.execution_id - # Check if already processing if execution_id in self._active_creations: self.logger.warning(f"Already creating pod for execution {execution_id}") return @@ -83,7 +82,7 @@ async def handle_create_pod_command(self, command: CreatePodCommandEvent) -> Non await self._create_pod_for_execution(command) async def handle_delete_pod_command(self, command: DeletePodCommandEvent) -> None: - """Handle delete pod command from saga orchestrator (compensation)""" + """Handle delete pod command from saga orchestrator (compensation).""" execution_id = command.execution_id self.logger.info(f"Deleting pod for execution {execution_id} due to: {command.reason}") @@ -211,7 +210,7 @@ async def _publish_execution_started(self, command: CreatePodCommandEvent, pod: container_id=None, metadata=command.metadata, ) - await self.producer.produce(event_to_produce=event, key=command.execution_id) + await self.producer.publish(event=event, key=command.execution_id) async def _publish_pod_created(self, command: CreatePodCommandEvent, pod: k8s_client.V1Pod) -> None: """Publish pod created event""" @@ -221,7 +220,7 @@ async def _publish_pod_created(self, command: CreatePodCommandEvent, pod: k8s_cl namespace=pod.metadata.namespace, metadata=command.metadata, ) - await self.producer.produce(event_to_produce=event, key=command.execution_id) + await self.producer.publish(event=event, key=command.execution_id) async def _publish_pod_creation_failed(self, command: CreatePodCommandEvent, error: str) -> None: """Publish pod creation failed event""" @@ -234,7 +233,7 @@ async def _publish_pod_creation_failed(self, command: CreatePodCommandEvent, err metadata=command.metadata, error_message=str(error), ) - await self.producer.produce(event_to_produce=event, key=command.execution_id) + await self.producer.publish(event=event, key=command.execution_id) async def wait_for_active_creations(self, timeout: float = 30.0) -> None: """Wait for active pod creations to complete (for graceful shutdown).""" diff --git a/backend/app/services/kafka_event_service.py b/backend/app/services/kafka_event_service.py deleted file mode 100644 index deca49a3..00000000 --- a/backend/app/services/kafka_event_service.py +++ /dev/null @@ -1,164 +0,0 @@ -import logging -import time -from datetime import datetime, timezone -from typing import Any -from uuid import uuid4 - -from opentelemetry import trace - -from app.core.correlation import CorrelationContext -from app.core.metrics import EventMetrics -from app.domain.enums.events import EventType -from app.domain.events import DomainEventAdapter -from app.domain.events.typed import DomainEvent, EventMetadata -from app.events.core import UnifiedProducer -from app.settings import Settings - -tracer = trace.get_tracer(__name__) - - -class KafkaEventService: - def __init__( - self, - kafka_producer: UnifiedProducer, - settings: Settings, - logger: logging.Logger, - event_metrics: EventMetrics, - ): - self.kafka_producer = kafka_producer - self.logger = logger - self.metrics = event_metrics - self.settings = settings - - async def publish_event( - self, - event_type: EventType, - payload: dict[str, Any], - aggregate_id: str | None, - correlation_id: str | None = None, - metadata: EventMetadata | None = None, - ) -> str: - """ - Build a typed DomainEvent from parameters and publish to Kafka. - - The producer persists the event to MongoDB before publishing. - """ - with tracer.start_as_current_span("publish_event") as span: - span.set_attribute("event.type", event_type) - if aggregate_id is not None: - span.set_attribute("aggregate.id", aggregate_id) - - start_time = time.time() - - if not correlation_id: - correlation_id = CorrelationContext.get_correlation_id() - - event_metadata = metadata or EventMetadata( - service_name=self.settings.SERVICE_NAME, - service_version=self.settings.SERVICE_VERSION, - correlation_id=correlation_id or str(uuid4()), - ) - if correlation_id and event_metadata.correlation_id != correlation_id: - event_metadata = event_metadata.model_copy(update={"correlation_id": correlation_id}) - - event_id = str(uuid4()) - timestamp = datetime.now(timezone.utc) - - # Create typed domain event via discriminated union adapter - event_data = { - "event_id": event_id, - "event_type": event_type, - "event_version": "1.0", - "timestamp": timestamp, - "aggregate_id": aggregate_id, - "metadata": event_metadata, - **payload, - } - domain_event = DomainEventAdapter.validate_python(event_data) - - await self.kafka_producer.produce(event_to_produce=domain_event, key=aggregate_id or domain_event.event_id) - self.metrics.record_event_published(event_type) - self.metrics.record_event_processing_duration(time.time() - start_time, event_type) - self.logger.info("Event published", extra={"event_type": event_type, "event_id": domain_event.event_id}) - return domain_event.event_id - - async def publish_execution_event( - self, - event_type: EventType, - execution_id: str, - status: str, - metadata: EventMetadata | None = None, - error_message: str | None = None, - ) -> str: - """Publish execution-related event using provided metadata (no framework coupling).""" - self.logger.info( - "Publishing execution event", - extra={ - "event_type": event_type, - "execution_id": execution_id, - "status": status, - }, - ) - - payload = {"execution_id": execution_id, "status": status} - - if error_message: - payload["error_message"] = error_message - - event_id = await self.publish_event( - event_type=event_type, - payload=payload, - aggregate_id=execution_id, - metadata=metadata, - ) - - self.logger.info( - "Execution event published successfully", - extra={ - "event_type": event_type, - "execution_id": execution_id, - "event_id": event_id, - }, - ) - - return event_id - - async def publish_pod_event( - self, - event_type: EventType, - pod_name: str, - execution_id: str, - namespace: str = "integr8scode", - status: str | None = None, - metadata: EventMetadata | None = None, - ) -> str: - """Publish pod-related event""" - payload = {"pod_name": pod_name, "execution_id": execution_id, "namespace": namespace} - - if status: - payload["status"] = status - - return await self.publish_event( - event_type=event_type, - payload=payload, - aggregate_id=execution_id, - metadata=metadata, - ) - - async def publish_domain_event(self, event: DomainEvent, key: str | None = None) -> str: - """Publish a pre-built DomainEvent to Kafka. - - The producer persists the event to MongoDB before publishing. - """ - with tracer.start_as_current_span("publish_domain_event") as span: - span.set_attribute("event.type", event.event_type) - if event.aggregate_id: - span.set_attribute("aggregate.id", event.aggregate_id) - - start_time = time.time() - - await self.kafka_producer.produce(event_to_produce=event, key=key or event.aggregate_id or event.event_id) - self.metrics.record_event_published(event.event_type) - self.metrics.record_event_processing_duration(time.time() - start_time, event.event_type) - self.logger.info("Domain event published", extra={"event_id": event.event_id}) - return event.event_id diff --git a/backend/app/services/notification_service.py b/backend/app/services/notification_service.py index e0aa7e6a..cea0a73c 100644 --- a/backend/app/services/notification_service.py +++ b/backend/app/services/notification_service.py @@ -32,7 +32,6 @@ NotificationValidationError, ) from app.schemas_pydantic.sse import RedisNotificationMessage -from app.services.kafka_event_service import KafkaEventService from app.services.sse.redis_bus import SSERedisBus from app.settings import Settings @@ -97,14 +96,12 @@ class NotificationService: def __init__( self, notification_repository: NotificationRepository, - event_service: KafkaEventService, sse_bus: SSERedisBus, settings: Settings, logger: logging.Logger, notification_metrics: NotificationMetrics, ) -> None: self.repository = notification_repository - self.event_service = event_service self.metrics = notification_metrics self.settings = settings self.sse_bus = sse_bus diff --git a/backend/app/services/pod_monitor/config.py b/backend/app/services/pod_monitor/config.py index f862f016..4a981d0c 100644 --- a/backend/app/services/pod_monitor/config.py +++ b/backend/app/services/pod_monitor/config.py @@ -1,20 +1,16 @@ import os from dataclasses import dataclass, field -from app.domain.enums.events import EventType -from app.infrastructure.kafka import get_topic_for_event from app.services.pod_monitor.event_mapper import PodPhase @dataclass class PodMonitorConfig: - """Configuration for PodMonitor service""" + """Configuration for PodMonitor service. - # Kafka settings - pod_events_topic: str = str(get_topic_for_event(EventType.POD_CREATED)) - execution_events_topic: str = str(get_topic_for_event(EventType.EXECUTION_REQUESTED)) - execution_completed_topic: str = str(get_topic_for_event(EventType.EXECUTION_COMPLETED)) - execution_failed_topic: str = str(get_topic_for_event(EventType.EXECUTION_FAILED)) + Note: Kafka topics are derived from event classes via BaseEvent.topic(). + No topic configuration needed here. + """ # Kubernetes settings namespace: str = os.environ.get("K8S_NAMESPACE", "integr8scode") diff --git a/backend/app/services/pod_monitor/event_mapper.py b/backend/app/services/pod_monitor/event_mapper.py index 565ecf5e..729d865a 100644 --- a/backend/app/services/pod_monitor/event_mapper.py +++ b/backend/app/services/pod_monitor/event_mapper.py @@ -8,8 +8,8 @@ from app.domain.enums.kafka import GroupId from app.domain.enums.storage import ExecutionErrorType from app.domain.events.typed import ( + BaseEvent, ContainerStatusInfo, - DomainEvent, EventMetadata, ExecutionCompletedEvent, ExecutionFailedEvent, @@ -22,8 +22,8 @@ # Python 3.12 type aliases type PodPhase = str -type EventList = list[DomainEvent] -type AsyncMapper = Callable[["PodContext"], Awaitable[DomainEvent | None]] +type EventList = list[BaseEvent] +type AsyncMapper = Callable[["PodContext"], Awaitable[BaseEvent | None]] @dataclass(frozen=True) @@ -104,7 +104,7 @@ async def map_pod_event(self, pod: k8s_client.V1Pod, event_type: str) -> EventLi ) # Collect events from mappers - events: list[DomainEvent] = [] + events: list[BaseEvent] = [] # Check for timeout first - if pod timed out, only return timeout event if timeout_event := await self._check_timeout(ctx): @@ -132,14 +132,14 @@ async def map_pod_event(self, pod: k8s_client.V1Pod, event_type: str) -> EventLi for mapper in self._phase_mappers.get(phase, []): if event := await mapper(ctx): mapper_name = getattr(mapper, "__name__", repr(mapper)) - self.logger.info(f"POD-EVENT: phase-map {mapper_name} -> {event.event_type} exec={ctx.execution_id}") + self.logger.info(f"POD-EVENT: phase-map {mapper_name} -> {type(event).topic()} exec={ctx.execution_id}") events.append(event) # Event type mappers for mapper in self._event_type_mappers.get(event_type, []): if event := await mapper(ctx): mapper_name = getattr(mapper, "__name__", repr(mapper)) - self.logger.info(f"POD-EVENT: type-map {mapper_name} -> {event.event_type} exec={ctx.execution_id}") + self.logger.info(f"POD-EVENT: type-map {mapper_name} -> {type(event).topic()} exec={ctx.execution_id}") events.append(event) return events @@ -213,7 +213,7 @@ async def _map_scheduled(self, ctx: PodContext) -> PodScheduledEvent | None: node_name=ctx.pod.spec.node_name or "pending", metadata=ctx.metadata, ) - self.logger.debug(f"POD-EVENT: mapped scheduled -> {evt.event_type} exec={ctx.execution_id}") + self.logger.debug(f"POD-EVENT: mapped scheduled -> {type(evt).topic()} exec={ctx.execution_id}") return evt async def _map_running(self, ctx: PodContext) -> PodRunningEvent | None: @@ -238,7 +238,7 @@ async def _map_running(self, ctx: PodContext) -> PodRunningEvent | None: container_statuses=container_statuses, metadata=ctx.metadata, ) - self.logger.debug(f"POD-EVENT: mapped running -> {evt.event_type} exec={ctx.execution_id}") + self.logger.debug(f"POD-EVENT: mapped running -> {type(evt).topic()} exec={ctx.execution_id}") return evt async def _map_completed(self, ctx: PodContext) -> ExecutionCompletedEvent | None: @@ -264,7 +264,7 @@ async def _map_completed(self, ctx: PodContext) -> ExecutionCompletedEvent | Non self.logger.info(f"POD-EVENT: mapped completed exec={ctx.execution_id} exit_code={logs.exit_code}") return evt - async def _map_failed_or_completed(self, ctx: PodContext) -> DomainEvent | None: + async def _map_failed_or_completed(self, ctx: PodContext) -> BaseEvent | None: """Map failed pod to either timeout, completed, or failed""" if ctx.pod.status and ctx.pod.status.reason == "DeadlineExceeded": return await self._check_timeout(ctx) diff --git a/backend/app/services/pod_monitor/monitor.py b/backend/app/services/pod_monitor/monitor.py index 80ead1e8..f73b3726 100644 --- a/backend/app/services/pod_monitor/monitor.py +++ b/backend/app/services/pod_monitor/monitor.py @@ -9,8 +9,8 @@ from app.core.metrics import KubernetesMetrics from app.core.utils import StringEnum -from app.domain.events.typed import DomainEvent -from app.services.kafka_event_service import KafkaEventService +from app.domain.events.typed import BaseEvent +from app.events.core import EventPublisher from app.services.pod_monitor.config import PodMonitorConfig from app.services.pod_monitor.event_mapper import PodEventMapper @@ -61,7 +61,7 @@ class PodMonitor: def __init__( self, config: PodMonitorConfig, - kafka_event_service: KafkaEventService, + producer: EventPublisher, logger: logging.Logger, api_client: k8s_client.ApiClient, event_mapper: PodEventMapper, @@ -76,7 +76,7 @@ def __init__( # Components self._event_mapper = event_mapper - self._kafka_event_service = kafka_event_service + self._producer = producer # Watch cursor — set from LIST on first run or after 410 Gone self._last_resource_version: ResourceVersion | None = None @@ -195,13 +195,13 @@ async def _process_pod_event(self, event: PodEvent) -> None: ) duration = time.time() - start_time - self._metrics.record_pod_monitor_event_processing_duration(duration, event.event_type) + self._metrics.record_pod_monitor_event_processing_duration(duration, event.event_type.value) except Exception as e: self.logger.error(f"Error processing pod event: {e}", exc_info=True) self._metrics.record_pod_monitor_watch_error(ErrorType.PROCESSING_ERROR) - async def _publish_event(self, event: DomainEvent, pod: k8s_client.V1Pod) -> None: + async def _publish_event(self, event: BaseEvent, pod: k8s_client.V1Pod) -> None: """Publish event to Kafka and store in events collection.""" try: if pod.metadata and pod.metadata.labels: @@ -210,10 +210,10 @@ async def _publish_event(self, event: DomainEvent, pod: k8s_client.V1Pod) -> Non execution_id = getattr(event, "execution_id", None) or event.aggregate_id key = str(execution_id or (pod.metadata.name if pod.metadata else "unknown")) - await self._kafka_event_service.publish_domain_event(event=event, key=key) + await self._producer.publish(event=event, key=key) phase = pod.status.phase if pod.status else "Unknown" - self._metrics.record_pod_monitor_event_published(event.event_type, phase) + self._metrics.record_pod_monitor_event_published(type(event).topic(), phase) except Exception as e: self.logger.error(f"Error publishing event: {e}", exc_info=True) diff --git a/backend/app/services/result_processor/processor.py b/backend/app/services/result_processor/processor.py index 85ef1967..ca249fa6 100644 --- a/backend/app/services/result_processor/processor.py +++ b/backend/app/services/result_processor/processor.py @@ -6,7 +6,6 @@ from app.domain.enums.kafka import GroupId from app.domain.enums.storage import ExecutionErrorType, StorageType from app.domain.events.typed import ( - DomainEvent, EventMetadata, ExecutionCompletedEvent, ExecutionFailedEvent, @@ -15,17 +14,20 @@ ResultStoredEvent, ) from app.domain.execution import ExecutionNotFoundError, ExecutionResultDomain -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.settings import Settings class ResultProcessor: - """Service for processing execution completion events and storing results.""" + """Service for processing execution completion events and storing results. + + Idempotency is handled by FastStream middleware (IdempotencyMiddleware). + """ def __init__( self, execution_repo: ExecutionRepository, - producer: UnifiedProducer, + producer: EventPublisher, settings: Settings, logger: logging.Logger, execution_metrics: ExecutionMetrics, @@ -36,11 +38,8 @@ def __init__( self._metrics = execution_metrics self.logger = logger - async def handle_execution_completed(self, event: DomainEvent) -> None: + async def handle_execution_completed(self, event: ExecutionCompletedEvent) -> None: """Handle execution completed event.""" - if not isinstance(event, ExecutionCompletedEvent): - raise TypeError(f"Expected ExecutionCompletedEvent, got {type(event).__name__}") - exec_obj = await self._execution_repo.get_execution(event.execution_id) if exec_obj is None: raise ExecutionNotFoundError(event.execution_id) @@ -74,11 +73,8 @@ async def handle_execution_completed(self, event: DomainEvent) -> None: self.logger.error(f"Failed to handle ExecutionCompletedEvent: {e}", exc_info=True) await self._publish_result_failed(event.execution_id, str(e)) - async def handle_execution_failed(self, event: DomainEvent) -> None: + async def handle_execution_failed(self, event: ExecutionFailedEvent) -> None: """Handle execution failed event.""" - if not isinstance(event, ExecutionFailedEvent): - raise TypeError(f"Expected ExecutionFailedEvent, got {type(event).__name__}") - exec_obj = await self._execution_repo.get_execution(event.execution_id) if exec_obj is None: raise ExecutionNotFoundError(event.execution_id) @@ -95,11 +91,8 @@ async def handle_execution_failed(self, event: DomainEvent) -> None: self.logger.error(f"Failed to handle ExecutionFailedEvent: {e}", exc_info=True) await self._publish_result_failed(event.execution_id, str(e)) - async def handle_execution_timeout(self, event: DomainEvent) -> None: + async def handle_execution_timeout(self, event: ExecutionTimeoutEvent) -> None: """Handle execution timeout event.""" - if not isinstance(event, ExecutionTimeoutEvent): - raise TypeError(f"Expected ExecutionTimeoutEvent, got {type(event).__name__}") - exec_obj = await self._execution_repo.get_execution(event.execution_id) if exec_obj is None: raise ExecutionNotFoundError(event.execution_id) @@ -133,7 +126,7 @@ async def _publish_result_stored(self, result: ExecutionResultDomain) -> None: service_version="1.0.0", ), ) - await self._producer.produce(event_to_produce=event, key=result.execution_id) + await self._producer.publish(event=event, key=result.execution_id) async def _publish_result_failed(self, execution_id: str, error_message: str) -> None: """Publish result processing failed event.""" @@ -145,4 +138,4 @@ async def _publish_result_failed(self, execution_id: str, error_message: str) -> service_version="1.0.0", ), ) - await self._producer.produce(event_to_produce=event, key=execution_id) + await self._producer.publish(event=event, key=execution_id) diff --git a/backend/app/services/saga/execution_saga.py b/backend/app/services/saga/execution_saga.py index 62dec39b..b16aa517 100644 --- a/backend/app/services/saga/execution_saga.py +++ b/backend/app/services/saga/execution_saga.py @@ -4,7 +4,7 @@ from app.db.repositories.resource_allocation_repository import ResourceAllocationRepository from app.domain.events.typed import CreatePodCommandEvent, DeletePodCommandEvent, EventMetadata, ExecutionRequestedEvent from app.domain.saga import DomainResourceAllocationCreate -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from .saga_step import CompensationStep, SagaContext, SagaStep @@ -76,7 +76,7 @@ def get_compensation(self) -> CompensationStep | None: class CreatePodStep(SagaStep[ExecutionRequestedEvent]): """Create Kubernetes pod.""" - def __init__(self, producer: UnifiedProducer, publish_commands: bool) -> None: + def __init__(self, producer: EventPublisher, publish_commands: bool) -> None: super().__init__("create_pod") self.producer = producer self.publish_commands = publish_commands @@ -116,7 +116,7 @@ async def execute(self, context: SagaContext, event: ExecutionRequestedEvent) -> ), ) - await self.producer.produce(event_to_produce=create_pod_cmd, key=execution_id) + await self.producer.publish(event=create_pod_cmd, key=execution_id) context.set("pod_creation_triggered", True) logger.info(f"CreatePodCommandEvent published for execution {execution_id}") @@ -151,7 +151,7 @@ async def compensate(self, context: SagaContext) -> bool: class DeletePodCompensation(CompensationStep): """Delete created pod.""" - def __init__(self, producer: UnifiedProducer) -> None: + def __init__(self, producer: EventPublisher) -> None: super().__init__("delete_pod") self.producer = producer @@ -173,7 +173,7 @@ async def compensate(self, context: SagaContext) -> bool: ), ) - await self.producer.produce(event_to_produce=delete_pod_cmd, key=execution_id) + await self.producer.publish(event=delete_pod_cmd, key=execution_id) logger.info(f"DeletePodCommandEvent published for {execution_id}") return True @@ -188,7 +188,7 @@ def get_name(cls) -> str: def bind_dependencies( self, - producer: UnifiedProducer, + producer: EventPublisher, alloc_repo: ResourceAllocationRepository, publish_commands: bool, ) -> None: diff --git a/backend/app/services/saga/saga_orchestrator.py b/backend/app/services/saga/saga_orchestrator.py index 702c5d1d..9153c6a8 100644 --- a/backend/app/services/saga/saga_orchestrator.py +++ b/backend/app/services/saga/saga_orchestrator.py @@ -10,7 +10,6 @@ from app.db.repositories.saga_repository import SagaRepository from app.domain.enums.saga import SagaState from app.domain.events.typed import ( - DomainEvent, EventMetadata, ExecutionCompletedEvent, ExecutionFailedEvent, @@ -19,7 +18,7 @@ SagaCancelledEvent, ) from app.domain.saga.models import Saga, SagaConfig -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from .execution_saga import ExecutionSaga from .saga_step import SagaContext @@ -28,13 +27,16 @@ class SagaOrchestrator: - """Orchestrates saga execution and compensation.""" + """Orchestrates saga execution and compensation. + + Idempotency is handled by FastStream middleware (IdempotencyMiddleware). + """ def __init__( self, config: SagaConfig, saga_repository: SagaRepository, - producer: UnifiedProducer, + producer: EventPublisher, resource_allocation_repository: ResourceAllocationRepository, logger: logging.Logger, ): @@ -44,30 +46,22 @@ def __init__( self._alloc_repo: ResourceAllocationRepository = resource_allocation_repository self.logger = logger - async def handle_execution_requested(self, event: DomainEvent) -> None: + async def handle_execution_requested(self, event: ExecutionRequestedEvent) -> None: """Handle EXECUTION_REQUESTED — starts a new saga.""" - if not isinstance(event, ExecutionRequestedEvent): - raise TypeError(f"Expected ExecutionRequestedEvent, got {type(event).__name__}") await self._start_saga(event) - async def handle_execution_completed(self, event: DomainEvent) -> None: + async def handle_execution_completed(self, event: ExecutionCompletedEvent) -> None: """Handle EXECUTION_COMPLETED — marks saga as completed.""" - if not isinstance(event, ExecutionCompletedEvent): - raise TypeError(f"Expected ExecutionCompletedEvent, got {type(event).__name__}") await self._resolve_completion(event.execution_id, SagaState.COMPLETED) - async def handle_execution_failed(self, event: DomainEvent) -> None: + async def handle_execution_failed(self, event: ExecutionFailedEvent) -> None: """Handle EXECUTION_FAILED — marks saga as failed.""" - if not isinstance(event, ExecutionFailedEvent): - raise TypeError(f"Expected ExecutionFailedEvent, got {type(event).__name__}") await self._resolve_completion( - event.execution_id, SagaState.FAILED, event.error_message or f"Execution {event.event_type}" + event.execution_id, SagaState.FAILED, event.error_message or "Execution failed" ) - async def handle_execution_timeout(self, event: DomainEvent) -> None: + async def handle_execution_timeout(self, event: ExecutionTimeoutEvent) -> None: """Handle EXECUTION_TIMEOUT — marks saga as timed out.""" - if not isinstance(event, ExecutionTimeoutEvent): - raise TypeError(f"Expected ExecutionTimeoutEvent, got {type(event).__name__}") await self._resolve_completion( event.execution_id, SagaState.TIMEOUT, f"Execution timed out after {event.timeout_seconds} seconds" ) @@ -132,7 +126,7 @@ async def _execute_saga( saga: ExecutionSaga, instance: Saga, context: SagaContext, - trigger_event: DomainEvent, + trigger_event: ExecutionRequestedEvent, ) -> None: """Execute saga steps.""" tracer = get_tracer() @@ -331,7 +325,7 @@ async def _publish_saga_cancelled_event(self, saga_instance: Saga) -> None: ) if self._producer: - await self._producer.produce(event_to_produce=event, key=saga_instance.execution_id) + await self._producer.publish(event=event, key=saga_instance.execution_id) self.logger.info(f"Published cancellation event for saga {saga_instance.saga_id}") diff --git a/backend/app/services/saga/saga_step.py b/backend/app/services/saga/saga_step.py index 81d65e1e..c7708120 100644 --- a/backend/app/services/saga/saga_step.py +++ b/backend/app/services/saga/saga_step.py @@ -3,9 +3,9 @@ from fastapi.encoders import jsonable_encoder -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent -T = TypeVar("T", bound=DomainEvent) +T = TypeVar("T", bound=BaseEvent) class SagaContext: diff --git a/backend/app/services/sse/redis_bus.py b/backend/app/services/sse/redis_bus.py index 1426056e..3991a3e1 100644 --- a/backend/app/services/sse/redis_bus.py +++ b/backend/app/services/sse/redis_bus.py @@ -6,8 +6,7 @@ import redis.asyncio as redis from pydantic import BaseModel -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.schemas_pydantic.sse import RedisNotificationMessage, RedisSSEMessage T = TypeVar("T", bound=BaseModel) @@ -45,23 +44,23 @@ async def close(self) -> None: class SSERedisBus: """Redis-backed pub/sub bus for SSE event fan-out across workers.""" - SSE_ROUTED_EVENTS: ClassVar[list[EventType]] = [ - EventType.EXECUTION_REQUESTED, - EventType.EXECUTION_QUEUED, - EventType.EXECUTION_STARTED, - EventType.EXECUTION_RUNNING, - EventType.EXECUTION_COMPLETED, - EventType.EXECUTION_FAILED, - EventType.EXECUTION_TIMEOUT, - EventType.EXECUTION_CANCELLED, - EventType.RESULT_STORED, - EventType.POD_CREATED, - EventType.POD_SCHEDULED, - EventType.POD_RUNNING, - EventType.POD_SUCCEEDED, - EventType.POD_FAILED, - EventType.POD_TERMINATED, - EventType.POD_DELETED, + SSE_ROUTED_TOPICS: ClassVar[list[str]] = [ + "execution_requested", + "execution_queued", + "execution_started", + "execution_running", + "execution_completed", + "execution_failed", + "execution_timeout", + "execution_cancelled", + "result_stored", + "pod_created", + "pod_scheduled", + "pod_running", + "pod_succeeded", + "pod_failed", + "pod_terminated", + "pod_deleted", ] def __init__( @@ -82,27 +81,29 @@ def _exec_channel(self, execution_id: str) -> str: def _notif_channel(self, user_id: str) -> str: return f"{self._notif_prefix}{user_id}" - async def publish_event(self, execution_id: str, event: DomainEvent) -> None: + async def publish_event(self, execution_id: str, event: BaseEvent) -> None: + topic = type(event).topic() message = RedisSSEMessage( - event_type=event.event_type, + event_type=topic, execution_id=execution_id, data=event.model_dump(mode="json"), ) await self._redis.publish(self._exec_channel(execution_id), message.model_dump_json()) - async def route_domain_event(self, event: DomainEvent) -> None: + async def route_domain_event(self, event: BaseEvent) -> None: """Route a domain event to its Redis execution channel by execution_id.""" data = event.model_dump() execution_id = data.get("execution_id") + topic = type(event).topic() if not execution_id: - self.logger.debug("Event %s has no execution_id, skipping", event.event_type) + self.logger.debug("Event %s has no execution_id, skipping", topic) return try: await self.publish_event(execution_id, event) except Exception as e: self.logger.error( "Failed to publish %s to Redis for %s: %s", - event.event_type, execution_id, e, + topic, execution_id, e, exc_info=True, ) diff --git a/backend/app/services/user_settings_service.py b/backend/app/services/user_settings_service.py index a2a607be..3c532430 100644 --- a/backend/app/services/user_settings_service.py +++ b/backend/app/services/user_settings_service.py @@ -6,7 +6,6 @@ from app.db.repositories.user_settings_repository import UserSettingsRepository from app.domain.enums import Theme -from app.domain.enums.events import EventType from app.domain.user import ( DomainEditorSettings, DomainNotificationSettings, @@ -15,7 +14,6 @@ DomainUserSettingsChangedEvent, DomainUserSettingsUpdate, ) -from app.services.kafka_event_service import KafkaEventService from app.settings import Settings @@ -23,12 +21,10 @@ class UserSettingsService: def __init__( self, repository: UserSettingsRepository, - event_service: KafkaEventService, settings: Settings, logger: logging.Logger, ) -> None: self.repository = repository - self.event_service = event_service self.settings = settings self.logger = logger self._cache_ttl = timedelta(minutes=5) @@ -57,13 +53,13 @@ async def get_user_settings_fresh(self, user_id: str) -> DomainUserSettings: snapshot = await self.repository.get_snapshot(user_id) settings: DomainUserSettings - event_types = [EventType.USER_SETTINGS_UPDATED] + topics = ["user_settings_updated"] if snapshot: settings = snapshot - events = await self.repository.get_settings_events(user_id, event_types, since=snapshot.updated_at) + events = await self.repository.get_settings_events(user_id, topics, since=snapshot.updated_at) else: settings = DomainUserSettings(user_id=user_id) - events = await self.repository.get_settings_events(user_id, event_types) + events = await self.repository.get_settings_events(user_id, topics) for event in events: settings = self._apply_event(settings, event) @@ -88,27 +84,11 @@ async def update_user_settings( "updated_at": datetime.now(timezone.utc), }) - await self._publish_settings_event(user_id, updates.model_dump(exclude_none=True, mode="json"), reason) - self._add_to_cache(user_id, new_settings) if (await self.repository.count_events_since_snapshot(user_id)) >= 10: await self.repository.create_snapshot(new_settings) return new_settings - async def _publish_settings_event(self, user_id: str, changes: dict[str, Any], reason: str | None) -> None: - """Publish settings update event with typed payload fields.""" - await self.event_service.publish_event( - event_type=EventType.USER_SETTINGS_UPDATED, - aggregate_id=f"user_settings_{user_id}", - payload={ - "user_id": user_id, - "changed_fields": list(changes.keys()), - "reason": reason, - **changes, - }, - metadata=None, - ) - async def update_theme(self, user_id: str, theme: Theme) -> DomainUserSettings: """Update user's theme preference""" return await self.update_user_settings( @@ -144,14 +124,14 @@ async def update_custom_setting(self, user_id: str, key: str, value: Any) -> Dom async def get_settings_history(self, user_id: str, limit: int = 50) -> list[DomainSettingsHistoryEntry]: """Get history from changed fields recorded in events.""" - events = await self.repository.get_settings_events(user_id, [EventType.USER_SETTINGS_UPDATED], limit=limit) + events = await self.repository.get_settings_events(user_id, ["user_settings_updated"], limit=limit) history: list[DomainSettingsHistoryEntry] = [] for event in events: for fld in event.changed_fields: history.append( DomainSettingsHistoryEntry( timestamp=event.timestamp, - event_type=event.event_type, + topic="user_settings_updated", field=f"/{fld}", old_value=None, new_value=event.model_dump().get(fld), @@ -163,7 +143,7 @@ async def get_settings_history(self, user_id: str, limit: int = 50) -> list[Doma async def restore_settings_to_point(self, user_id: str, timestamp: datetime) -> DomainUserSettings: """Restore settings to a specific point in time""" - events = await self.repository.get_settings_events(user_id, [EventType.USER_SETTINGS_UPDATED], until=timestamp) + events = await self.repository.get_settings_events(user_id, ["user_settings_updated"], until=timestamp) settings = DomainUserSettings(user_id=user_id) for event in events: @@ -172,17 +152,6 @@ async def restore_settings_to_point(self, user_id: str, timestamp: datetime) -> await self.repository.create_snapshot(settings) self._add_to_cache(user_id, settings) - await self.event_service.publish_event( - event_type=EventType.USER_SETTINGS_UPDATED, - aggregate_id=f"user_settings_{user_id}", - payload={ - "user_id": user_id, - "changed_fields": [], - "reason": f"Settings restored to {timestamp.isoformat()}", - }, - metadata=None, - ) - return settings def _apply_event(self, settings: DomainUserSettings, event: DomainUserSettingsChangedEvent) -> DomainUserSettings: diff --git a/backend/pyproject.toml b/backend/pyproject.toml index ebc70c1b..e36632d3 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -14,7 +14,6 @@ dependencies = [ "asgiref==3.11.0", "async-timeout==5.0.1", "attrs==25.3.0", - "avro-python3==1.10.2", "backoff==2.2.1", "blinker==1.9.0", "Brotli==1.2.0", @@ -24,7 +23,6 @@ dependencies = [ "click==8.1.7", "ConfigArgParse==1.7.1", "aiokafka==0.12.0", - "python-schema-registry-client==2.6.1", "contourpy==1.3.3", "cycler==0.12.1", "Deprecated==1.2.14", @@ -34,7 +32,6 @@ dependencies = [ "email-validator==2.3.0", "exceptiongroup==1.2.2", "fastapi==0.128.0", - "fastavro==1.12.1", "fonttools==4.61.1", "frozenlist==1.7.0", "google-auth==2.47.0", @@ -89,7 +86,6 @@ dependencies = [ "pyasn1==0.6.2", "pyasn1_modules==0.4.2", "pydantic==2.9.2", - "pydantic-avro==0.9.1", "pydantic_core==2.23.4", "Pygments==2.19.2", "PyJWT==2.9.0", @@ -173,7 +169,7 @@ exclude = [ ] [tool.ruff.lint.flake8-bugbear] -extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query"] +extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query", "faststream.Context"] [tool.ruff.lint.flake8-annotations] mypy-init-return = true diff --git a/backend/scripts/create_topics.py b/backend/scripts/create_topics.py index 2cf81e21..4a0e824f 100755 --- a/backend/scripts/create_topics.py +++ b/backend/scripts/create_topics.py @@ -1,21 +1,92 @@ #!/usr/bin/env python3 """ Create all required Kafka topics for the Integr8sCode backend. + +With FastStream, topics are derived from event class names. +This script discovers all event classes and creates their topics. """ import asyncio import os import sys +from typing import Any from aiokafka.admin import AIOKafkaAdminClient, NewTopic from aiokafka.errors import TopicAlreadyExistsError from app.core.logging import setup_logger -from app.infrastructure.kafka.topics import get_all_topics, get_topic_configs +from app.domain.enums.kafka import KafkaTopic +from app.domain.events.typed import BaseEvent from app.settings import Settings logger = setup_logger(os.environ.get("LOG_LEVEL", "INFO")) +def get_all_event_topics() -> set[str]: + """Discover all topics from BaseEvent subclasses.""" + topics: set[str] = set() + + def collect_subclasses(cls: type[BaseEvent]) -> None: + for subclass in cls.__subclasses__(): + # Skip abstract/base classes + if not subclass.__name__.startswith("_"): + topics.add(subclass.topic()) + collect_subclasses(subclass) + + collect_subclasses(BaseEvent) + return topics + + +def get_infrastructure_topics() -> set[str]: + """Get infrastructure topics (DLQ) from KafkaTopic enum.""" + return {str(t) for t in KafkaTopic} + + +def get_topic_config(topic: str) -> dict[str, Any]: + """Get configuration for a topic based on its name.""" + # DLQ topics need longer retention + if "dlq" in topic or "dead_letter" in topic: + return { + "num_partitions": 3, + "replication_factor": 1, + "config": { + "retention.ms": "1209600000", # 14 days + "compression.type": "gzip", + }, + } + + # Execution-related topics need more partitions + if "execution" in topic: + return { + "num_partitions": 10, + "replication_factor": 1, + "config": { + "retention.ms": "604800000", # 7 days + "compression.type": "gzip", + }, + } + + # Pod events + if "pod" in topic: + return { + "num_partitions": 10, + "replication_factor": 1, + "config": { + "retention.ms": "86400000", # 1 day + "compression.type": "gzip", + }, + } + + # Default config + return { + "num_partitions": 5, + "replication_factor": 1, + "config": { + "retention.ms": "604800000", # 7 days + "compression.type": "gzip", + }, + } + + async def create_topics(settings: Settings) -> None: """Create all required Kafka topics using provided settings.""" @@ -34,31 +105,22 @@ async def create_topics(settings: Settings) -> None: existing_topics_set = set(existing_topics) logger.info(f"Existing topics: {existing_topics_set}") - # Get all required topics and their configs - all_topics = get_all_topics() - topic_configs = get_topic_configs() + # Collect all required topics + event_topics = get_all_event_topics() + infra_topics = get_infrastructure_topics() + all_topics = event_topics | infra_topics + topic_prefix = settings.KAFKA_TOPIC_PREFIX logger.info(f"Total required topics: {len(all_topics)} (prefix: '{topic_prefix}')") # Create topics topics_to_create: list[NewTopic] = [] - for topic in all_topics: + for topic in sorted(all_topics): # Apply topic prefix for consistency with consumers/producers topic_name = f"{topic_prefix}{topic}" if topic_name not in existing_topics_set: - # Get config from topic_configs - config = topic_configs.get( - topic, - { - "num_partitions": 3, - "replication_factor": 1, - "config": { - "retention.ms": "604800000", # 7 days - "compression.type": "gzip", - }, - }, - ) + config = get_topic_config(topic) new_topic = NewTopic( name=topic_name, diff --git a/backend/tests/e2e/app/test_main_app.py b/backend/tests/e2e/app/test_main_app.py index 160f5a83..da2e5286 100644 --- a/backend/tests/e2e/app/test_main_app.py +++ b/backend/tests/e2e/app/test_main_app.py @@ -6,7 +6,6 @@ import redis.asyncio as aioredis from app.core.database_context import Database from app.domain.exceptions import DomainError -from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings from dishka import AsyncContainer from fastapi import FastAPI @@ -294,13 +293,6 @@ async def test_redis_connected(self, scope: AsyncContainer) -> None: pong = await redis_client.ping() # type: ignore[misc] assert pong is True - @pytest.mark.asyncio - async def test_schema_registry_initialized(self, scope: AsyncContainer) -> None: - """Schema registry manager is initialized.""" - schema_registry = await scope.get(SchemaRegistryManager) - assert schema_registry is not None - - class TestCreateAppFunction: """Tests for create_app factory function.""" diff --git a/backend/tests/e2e/core/test_container.py b/backend/tests/e2e/core/test_container.py index 45ac8ae5..5ad95b27 100644 --- a/backend/tests/e2e/core/test_container.py +++ b/backend/tests/e2e/core/test_container.py @@ -4,14 +4,13 @@ import redis.asyncio as aioredis from app.core.database_context import Database from app.core.security import SecurityService -from app.events.schema.schema_registry import SchemaRegistryManager +from app.services.admin import AdminUserService +from app.services.event_replay import EventReplayService from app.services.event_service import EventService from app.services.execution_service import ExecutionService from app.services.notification_service import NotificationService from app.services.rate_limit_service import RateLimitService -from app.services.event_replay import EventReplayService from app.services.saved_script_service import SavedScriptService -from app.services.admin import AdminUserService from app.services.user_settings_service import UserSettingsService from app.settings import Settings from dishka import AsyncContainer @@ -82,16 +81,6 @@ async def test_resolves_event_service(self, scope: AsyncContainer) -> None: assert isinstance(service, EventService) - @pytest.mark.asyncio - async def test_resolves_schema_registry( - self, scope: AsyncContainer - ) -> None: - """Container resolves SchemaRegistryManager.""" - registry = await scope.get(SchemaRegistryManager) - - assert isinstance(registry, SchemaRegistryManager) - - class TestBusinessServices: """Tests for business service resolution.""" diff --git a/backend/tests/e2e/core/test_dishka_lifespan.py b/backend/tests/e2e/core/test_dishka_lifespan.py index e1e387a8..466e88b5 100644 --- a/backend/tests/e2e/core/test_dishka_lifespan.py +++ b/backend/tests/e2e/core/test_dishka_lifespan.py @@ -70,16 +70,6 @@ async def test_redis_connected(self, scope: AsyncContainer) -> None: pong = await redis_client.ping() # type: ignore[misc] assert pong is True - @pytest.mark.asyncio - async def test_schema_registry_initialized( - self, scope: AsyncContainer - ) -> None: - """Schema registry is initialized during lifespan.""" - from app.events.schema.schema_registry import SchemaRegistryManager - - registry = await scope.get(SchemaRegistryManager) - assert registry is not None - @pytest.mark.asyncio async def test_sse_redis_bus_available(self, scope: AsyncContainer) -> None: """SSE Redis bus is available after lifespan.""" diff --git a/backend/tests/e2e/core/test_middlewares.py b/backend/tests/e2e/core/test_middlewares.py index 7cc17388..b0c9bafd 100644 --- a/backend/tests/e2e/core/test_middlewares.py +++ b/backend/tests/e2e/core/test_middlewares.py @@ -1,6 +1,5 @@ import httpx import pytest - from app.settings import Settings pytestmark = pytest.mark.e2e diff --git a/backend/tests/e2e/dlq/test_dlq_discard.py b/backend/tests/e2e/dlq/test_dlq_discard.py index 2c4650f4..ad0baa82 100644 --- a/backend/tests/e2e/dlq/test_dlq_discard.py +++ b/backend/tests/e2e/dlq/test_dlq_discard.py @@ -6,7 +6,7 @@ from app.db.docs import DLQMessageDocument from app.db.repositories.dlq_repository import DLQRepository from app.dlq.models import DLQMessageStatus -from app.domain.enums.kafka import KafkaTopic +from app.domain.events.typed import ExecutionRequestedEvent from dishka import AsyncContainer from tests.conftest import make_execution_requested_event @@ -32,7 +32,7 @@ async def _create_dlq_document( doc = DLQMessageDocument( event=event_dict, - original_topic=str(KafkaTopic.EXECUTION_EVENTS), + original_topic=ExecutionRequestedEvent.topic(), error="Test error", retry_count=0, failed_at=now, diff --git a/backend/tests/e2e/dlq/test_dlq_manager.py b/backend/tests/e2e/dlq/test_dlq_manager.py index e19e4528..9c804ec8 100644 --- a/backend/tests/e2e/dlq/test_dlq_manager.py +++ b/backend/tests/e2e/dlq/test_dlq_manager.py @@ -1,21 +1,20 @@ import asyncio +import json import logging import uuid from datetime import datetime, timezone import pytest from aiokafka import AIOKafkaConsumer -from faststream.kafka import KafkaBroker from app.core.metrics import DLQMetrics from app.db.repositories.dlq_repository import DLQRepository from app.dlq.manager import DLQManager from app.dlq.models import DLQMessage -from app.domain.enums.events import EventType from app.domain.enums.kafka import KafkaTopic -from app.domain.events.typed import DLQMessageReceivedEvent, DomainEventAdapter -from app.events.schema.schema_registry import SchemaRegistryManager +from app.domain.events.typed import DLQMessageReceivedEvent, ExecutionRequestedEvent from app.settings import Settings from dishka import AsyncContainer +from faststream.kafka import KafkaBroker from tests.conftest import make_execution_requested_event @@ -30,7 +29,6 @@ @pytest.mark.asyncio async def test_dlq_manager_persists_and_emits_event(scope: AsyncContainer, test_settings: Settings) -> None: """Test that DLQ manager persists messages and emits DLQMessageReceivedEvent.""" - schema_registry = SchemaRegistryManager(test_settings, _test_logger) dlq_metrics: DLQMetrics = await scope.get(DLQMetrics) prefix = test_settings.KAFKA_TOPIC_PREFIX @@ -53,15 +51,9 @@ async def consume_dlq_events() -> None: """Consume DLQ events and set future when our event is received.""" async for msg in events_consumer: try: - payload = await schema_registry.serializer.decode_message(msg.value) - if payload is None: - continue - event = DomainEventAdapter.validate_python(payload) - if ( - isinstance(event, DLQMessageReceivedEvent) - and event.dlq_event_id == ev.event_id - and not received_future.done() - ): + payload = json.loads(msg.value) + event = DLQMessageReceivedEvent.model_validate(payload) + if event.dlq_event_id == ev.event_id and not received_future.done(): received_future.set_result(event) return except Exception as e: @@ -79,7 +71,6 @@ async def consume_dlq_events() -> None: manager = DLQManager( settings=test_settings, broker=broker, - schema_registry=schema_registry, logger=_test_logger, dlq_metrics=dlq_metrics, repository=repository, @@ -88,7 +79,7 @@ async def consume_dlq_events() -> None: # Build a DLQMessage directly and call handle_message (no internal consumer loop) dlq_msg = DLQMessage( event=ev, - original_topic=f"{prefix}{str(KafkaTopic.EXECUTION_EVENTS)}", + original_topic=f"{prefix}{ExecutionRequestedEvent.topic()}", error="handler failed", retry_count=0, failed_at=datetime.now(timezone.utc), @@ -100,8 +91,8 @@ async def consume_dlq_events() -> None: # Await the DLQMessageReceivedEvent — true async, no polling received = await asyncio.wait_for(received_future, timeout=15.0) assert received.dlq_event_id == ev.event_id - assert received.event_type == EventType.DLQ_MESSAGE_RECEIVED - assert received.original_event_type == str(EventType.EXECUTION_REQUESTED) + assert type(received).topic() == "dlq_message_received" + assert "execution" in received.original_topic.lower() assert received.error == "handler failed" finally: consume_task.cancel() diff --git a/backend/tests/e2e/dlq/test_dlq_retry.py b/backend/tests/e2e/dlq/test_dlq_retry.py index d01fefe7..4ddd6b6a 100644 --- a/backend/tests/e2e/dlq/test_dlq_retry.py +++ b/backend/tests/e2e/dlq/test_dlq_retry.py @@ -6,7 +6,7 @@ from app.db.docs import DLQMessageDocument from app.db.repositories.dlq_repository import DLQRepository from app.dlq.models import DLQMessageStatus -from app.domain.enums.kafka import KafkaTopic +from app.domain.events.typed import ExecutionRequestedEvent from dishka import AsyncContainer from tests.conftest import make_execution_requested_event @@ -32,7 +32,7 @@ async def _create_dlq_document( doc = DLQMessageDocument( event=event_dict, - original_topic=str(KafkaTopic.EXECUTION_EVENTS), + original_topic=ExecutionRequestedEvent.topic(), error="Test error", retry_count=0, failed_at=now, diff --git a/backend/tests/e2e/events/test_producer_roundtrip.py b/backend/tests/e2e/events/test_producer_roundtrip.py index 773c7f3a..2ba752d2 100644 --- a/backend/tests/e2e/events/test_producer_roundtrip.py +++ b/backend/tests/e2e/events/test_producer_roundtrip.py @@ -2,8 +2,7 @@ from uuid import uuid4 import pytest -from app.events.core import UnifiedProducer -from app.infrastructure.kafka.mappings import get_topic_for_event +from app.events.core import EventPublisher from dishka import AsyncContainer from tests.conftest import make_execution_requested_event @@ -17,11 +16,12 @@ async def test_unified_producer_produce_and_send_to_dlq( scope: AsyncContainer, ) -> None: - prod: UnifiedProducer = await scope.get(UnifiedProducer) + prod: EventPublisher = await scope.get(EventPublisher) ev = make_execution_requested_event(execution_id=f"exec-{uuid4().hex[:8]}") - await prod.produce(ev, key=ev.execution_id) + await prod.publish(ev, key=ev.execution_id) # Exercise send_to_dlq path — should not raise - topic = str(get_topic_for_event(ev.event_type)) + # Topic is derived from event class via BaseEvent.topic() + topic = type(ev).topic() await prod.send_to_dlq(ev, original_topic=topic, error=RuntimeError("forced"), retry_count=1) diff --git a/backend/tests/e2e/events/test_schema_registry_real.py b/backend/tests/e2e/events/test_schema_registry_real.py deleted file mode 100644 index 58e4900d..00000000 --- a/backend/tests/e2e/events/test_schema_registry_real.py +++ /dev/null @@ -1,29 +0,0 @@ -import logging - -import pytest - -from app.domain.events.typed import DomainEventAdapter, EventMetadata, PodCreatedEvent -from app.events.schema.schema_registry import SchemaRegistryManager -from app.settings import Settings - -pytestmark = [pytest.mark.e2e, pytest.mark.kafka] - -_test_logger = logging.getLogger("test.events.schema_registry_real") - - -@pytest.mark.asyncio -async def test_serialize_and_deserialize_event_real_registry(test_settings: Settings) -> None: - # Uses real Schema Registry configured via env (SCHEMA_REGISTRY_URL) - m = SchemaRegistryManager(settings=test_settings, logger=_test_logger) - ev = PodCreatedEvent( - execution_id="e1", - pod_name="p", - namespace="n", - metadata=EventMetadata(service_name="s", service_version="1"), - ) - data = await m.serialize_event(ev) - payload = await m.serializer.decode_message(data) - assert payload is not None - obj = DomainEventAdapter.validate_python(payload) - assert isinstance(obj, PodCreatedEvent) - assert obj.namespace == "n" diff --git a/backend/tests/e2e/events/test_schema_registry_roundtrip.py b/backend/tests/e2e/events/test_schema_registry_roundtrip.py deleted file mode 100644 index 1fc83467..00000000 --- a/backend/tests/e2e/events/test_schema_registry_roundtrip.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging - -import pytest - -from app.domain.events.typed import DomainEventAdapter -from app.events.schema.schema_registry import SchemaRegistryManager -from dishka import AsyncContainer - -from tests.conftest import make_execution_requested_event - -pytestmark = [pytest.mark.e2e] - -_test_logger = logging.getLogger("test.events.schema_registry_roundtrip") - - -@pytest.mark.asyncio -async def test_schema_registry_serialize_deserialize_roundtrip(scope: AsyncContainer) -> None: - reg: SchemaRegistryManager = await scope.get(SchemaRegistryManager) - ev = make_execution_requested_event(execution_id="e-rt") - data = await reg.serialize_event(ev) - assert data[:1] == b"\x00" # Confluent wire format magic byte - payload = await reg.serializer.decode_message(data) - assert payload is not None - back = DomainEventAdapter.validate_python(payload) - assert back.event_id == ev.event_id and getattr(back, "execution_id", None) == ev.execution_id diff --git a/backend/tests/e2e/idempotency/test_consumer_idempotent.py b/backend/tests/e2e/idempotency/test_consumer_idempotent.py index 5b500d90..90f8c364 100644 --- a/backend/tests/e2e/idempotency/test_consumer_idempotent.py +++ b/backend/tests/e2e/idempotency/test_consumer_idempotent.py @@ -4,6 +4,7 @@ from app.domain.idempotency import IdempotencyStatus, KeyStrategy from app.services.idempotency.idempotency_manager import IdempotencyManager from dishka import AsyncContainer + from tests.conftest import make_execution_requested_event pytestmark = [pytest.mark.e2e, pytest.mark.redis] diff --git a/backend/tests/e2e/idempotency/test_idempotency.py b/backend/tests/e2e/idempotency/test_idempotency.py index 0dad952d..ca094401 100644 --- a/backend/tests/e2e/idempotency/test_idempotency.py +++ b/backend/tests/e2e/idempotency/test_idempotency.py @@ -46,7 +46,7 @@ async def test_complete_flow_new_event(self, manager: IdempotencyManager) -> Non assert result.is_duplicate is False assert result.status == IdempotencyStatus.PROCESSING - assert result.key.endswith(f"{real_event.event_type}:{real_event.event_id}") + assert result.key.endswith(f"{type(real_event).topic()}:{real_event.event_id}") assert result.key.startswith(f"{manager.config.key_prefix}:") # Verify it's in the repository diff --git a/backend/tests/e2e/result_processor/test_result_processor.py b/backend/tests/e2e/result_processor/test_result_processor.py index 7976d1b0..fd13c8a2 100644 --- a/backend/tests/e2e/result_processor/test_result_processor.py +++ b/backend/tests/e2e/result_processor/test_result_processor.py @@ -1,5 +1,4 @@ import logging -import uuid import pytest from app.core.database_context import Database @@ -12,8 +11,7 @@ ResourceUsageDomain, ) from app.domain.execution import DomainExecutionCreate -from app.events.core import UnifiedProducer -from app.events.schema.schema_registry import SchemaRegistryManager +from app.events.core import EventPublisher from app.services.result_processor.processor import ResultProcessor from app.settings import Settings from dishka import AsyncContainer @@ -30,15 +28,13 @@ @pytest.mark.asyncio async def test_result_processor_persists_and_emits(scope: AsyncContainer) -> None: - # Schemas are initialized inside the SchemaRegistryManager DI provider - registry: SchemaRegistryManager = await scope.get(SchemaRegistryManager) settings: Settings = await scope.get(Settings) execution_metrics: ExecutionMetrics = await scope.get(ExecutionMetrics) # Dependencies db: Database = await scope.get(Database) repo: ExecutionRepository = await scope.get(ExecutionRepository) - producer: UnifiedProducer = await scope.get(UnifiedProducer) + producer: EventPublisher = await scope.get(EventPublisher) # Create a base execution to satisfy ResultProcessor lookup created = await repo.create_execution(DomainExecutionCreate( diff --git a/backend/tests/e2e/services/events/test_kafka_event_service.py b/backend/tests/e2e/services/events/test_kafka_event_service.py deleted file mode 100644 index 1a02e800..00000000 --- a/backend/tests/e2e/services/events/test_kafka_event_service.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest -from app.db.repositories import EventRepository -from app.domain.enums.events import EventType -from app.domain.enums.execution import ExecutionStatus -from app.services.kafka_event_service import KafkaEventService -from dishka import AsyncContainer - -pytestmark = [pytest.mark.e2e, pytest.mark.kafka, pytest.mark.mongodb] - - -@pytest.mark.asyncio -async def test_publish_user_registered_event(scope: AsyncContainer) -> None: - svc: KafkaEventService = await scope.get(KafkaEventService) - repo: EventRepository = await scope.get(EventRepository) - - event_id = await svc.publish_event( - event_type=EventType.USER_REGISTERED, - payload={"user_id": "u1", "username": "alice", "email": "alice@example.com"}, - aggregate_id="u1", - ) - assert isinstance(event_id, str) and event_id - stored = await repo.get_event(event_id) - assert stored is not None and stored.event_id == event_id - - -@pytest.mark.asyncio -async def test_publish_execution_event(scope: AsyncContainer) -> None: - svc: KafkaEventService = await scope.get(KafkaEventService) - repo: EventRepository = await scope.get(EventRepository) - - event_id = await svc.publish_execution_event( - event_type=EventType.EXECUTION_QUEUED, - execution_id="exec1", - status=ExecutionStatus.QUEUED, - metadata=None, - error_message=None, - ) - assert isinstance(event_id, str) and event_id - assert await repo.get_event(event_id) is not None - - -@pytest.mark.asyncio -async def test_publish_pod_event_and_without_metadata(scope: AsyncContainer) -> None: - svc: KafkaEventService = await scope.get(KafkaEventService) - repo: EventRepository = await scope.get(EventRepository) - - # Pod event - eid = await svc.publish_pod_event( - event_type=EventType.POD_CREATED, - pod_name="executor-pod1", - execution_id="exec1", - namespace="ns", - status="pending", - metadata=None, - ) - assert isinstance(eid, str) - assert await repo.get_event(eid) is not None - - # Generic event without metadata - eid2 = await svc.publish_event( - event_type=EventType.USER_LOGGED_IN, - payload={"user_id": "u2", "login_method": "password"}, - aggregate_id="u2", - metadata=None, - ) - assert isinstance(eid2, str) - assert await repo.get_event(eid2) is not None diff --git a/backend/tests/e2e/services/execution/test_execution_service.py b/backend/tests/e2e/services/execution/test_execution_service.py index 8ae06e85..a930f7cc 100644 --- a/backend/tests/e2e/services/execution/test_execution_service.py +++ b/backend/tests/e2e/services/execution/test_execution_service.py @@ -1,8 +1,8 @@ import uuid import pytest -from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus +from app.domain.events.typed import ExecutionRequestedEvent from app.domain.execution import ResourceLimitsDomain from app.domain.execution.exceptions import ExecutionNotFoundError from app.services.execution_service import ExecutionService @@ -186,16 +186,14 @@ async def test_get_execution_events(self, scope: AsyncContainer) -> None: events = await svc.get_execution_events(exec_result.execution_id) assert isinstance(events, list) - # Should have at least EXECUTION_REQUESTED event - if events: - event_types = {e.event_type for e in events} - assert EventType.EXECUTION_REQUESTED in event_types + # Should have at least one event + assert len(events) > 0 @pytest.mark.asyncio async def test_get_execution_events_with_filter( self, scope: AsyncContainer ) -> None: - """Get events filtered by type.""" + """Get events filtered by topic.""" svc: ExecutionService = await scope.get(ExecutionService) user_id = f"test_user_{uuid.uuid4().hex[:8]}" @@ -210,12 +208,12 @@ async def test_get_execution_events_with_filter( events = await svc.get_execution_events( exec_result.execution_id, - event_types=[EventType.EXECUTION_REQUESTED], + topics=[ExecutionRequestedEvent.topic()], ) assert isinstance(events, list) - for event in events: - assert event.event_type == EventType.EXECUTION_REQUESTED + # When filtered by topic, should return matching events + assert len(events) > 0 class TestGetUserExecutions: diff --git a/backend/tests/e2e/services/idempotency/test_redis_repository.py b/backend/tests/e2e/services/idempotency/test_redis_repository.py index a295d307..53805f70 100644 --- a/backend/tests/e2e/services/idempotency/test_redis_repository.py +++ b/backend/tests/e2e/services/idempotency/test_redis_repository.py @@ -1,5 +1,4 @@ import json -import uuid from datetime import datetime, timedelta, timezone import pytest diff --git a/backend/tests/e2e/services/replay/test_replay_service.py b/backend/tests/e2e/services/replay/test_replay_service.py index e7f847d6..81c5c9bd 100644 --- a/backend/tests/e2e/services/replay/test_replay_service.py +++ b/backend/tests/e2e/services/replay/test_replay_service.py @@ -1,8 +1,7 @@ import pytest from app.domain.enums.replay import ReplayStatus, ReplayTarget, ReplayType from app.domain.replay.exceptions import ReplaySessionNotFoundError -from app.services.event_replay import ReplayConfig, ReplayFilter -from app.services.event_replay import EventReplayService +from app.services.event_replay import EventReplayService, ReplayConfig, ReplayFilter from dishka import AsyncContainer pytestmark = [pytest.mark.e2e, pytest.mark.kafka] diff --git a/backend/tests/e2e/services/sse/test_partitioned_event_router.py b/backend/tests/e2e/services/sse/test_partitioned_event_router.py index 76548d80..60aadd71 100644 --- a/backend/tests/e2e/services/sse/test_partitioned_event_router.py +++ b/backend/tests/e2e/services/sse/test_partitioned_event_router.py @@ -33,4 +33,4 @@ async def test_bus_routes_event_to_redis(redis_client: redis.Redis, test_setting msg = await asyncio.wait_for(subscription.get(RedisSSEMessage), timeout=2.0) assert msg is not None - assert str(msg.event_type) == str(ev.event_type) + assert msg.event_type == type(ev).topic() diff --git a/backend/tests/e2e/test_admin_events_routes.py b/backend/tests/e2e/test_admin_events_routes.py index 0691b500..95eebc93 100644 --- a/backend/tests/e2e/test_admin_events_routes.py +++ b/backend/tests/e2e/test_admin_events_routes.py @@ -3,9 +3,8 @@ import pytest import pytest_asyncio from app.db.repositories.event_repository import EventRepository -from app.domain.enums.events import EventType from app.domain.enums.replay import ReplayStatus -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.schemas_pydantic.admin_events import ( EventBrowseRequest, EventBrowseResponse, @@ -17,16 +16,16 @@ EventReplayStatusResponse, EventStatsResponse, ) -from app.schemas_pydantic.execution import ExecutionRequest, ExecutionResponse from dishka import AsyncContainer from httpx import AsyncClient + from tests.conftest import make_execution_requested_event pytestmark = [pytest.mark.e2e, pytest.mark.admin] @pytest_asyncio.fixture -async def stored_event(scope: AsyncContainer) -> DomainEvent: +async def stored_event(scope: AsyncContainer) -> BaseEvent: """Insert a test event directly into DB - no Kafka/waiting needed.""" repo = await scope.get(EventRepository) event = make_execution_requested_event(execution_id=f"e-{uuid.uuid4().hex[:8]}") @@ -61,12 +60,12 @@ async def test_browse_events(self, test_admin: AsyncClient) -> None: @pytest.mark.asyncio async def test_browse_events_with_event_type_filter( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Browse events filtered by event type.""" request = EventBrowseRequest( filters=EventFilter( - event_types=[EventType.EXECUTION_REQUESTED], + topics=["execution_requested"], aggregate_id=stored_event.aggregate_id, ), skip=0, @@ -102,7 +101,7 @@ async def test_browse_events_with_pagination( @pytest.mark.asyncio async def test_browse_events_with_aggregate_filter( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Browse events filtered by aggregate ID.""" request = EventBrowseRequest( @@ -172,7 +171,7 @@ async def test_get_event_stats(self, test_admin: AsyncClient) -> None: stats = EventStatsResponse.model_validate(response.json()) assert stats.total_events >= 0 - assert isinstance(stats.events_by_type, list) + assert isinstance(stats.events_by_topic, list) assert isinstance(stats.events_by_hour, list) assert isinstance(stats.top_users, list) assert stats.error_rate >= 0.0 @@ -243,7 +242,7 @@ async def test_export_events_csv_with_filters( response = await test_admin.get( "/api/v1/admin/events/export/csv", params={ - "event_types": [EventType.EXECUTION_REQUESTED], + "topics": ["execution_requested"], "limit": 100, }, ) @@ -289,7 +288,7 @@ async def test_export_events_json_with_filters( response = await test_admin.get( "/api/v1/admin/events/export/json", params={ - "event_types": [EventType.EXECUTION_REQUESTED, EventType.EXECUTION_STARTED], + "topics": ["execution_requested", "execution_started"], "limit": 500, }, ) @@ -311,7 +310,7 @@ class TestGetEventDetail: @pytest.mark.asyncio async def test_get_event_detail( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Admin can get event details.""" response = await test_admin.get(f"/api/v1/admin/events/{stored_event.event_id}") @@ -349,7 +348,7 @@ class TestReplayEvents: @pytest.mark.asyncio async def test_replay_events_dry_run( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Admin can replay events in dry run mode.""" request = EventReplayRequest( @@ -411,7 +410,7 @@ async def test_get_replay_status_not_found( @pytest.mark.asyncio async def test_get_replay_status_after_replay( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Get replay status after starting a replay.""" request = EventReplayRequest( @@ -456,7 +455,7 @@ class TestDeleteEvent: @pytest.mark.asyncio async def test_delete_event( - self, test_admin: AsyncClient, stored_event: DomainEvent + self, test_admin: AsyncClient, stored_event: BaseEvent ) -> None: """Admin can delete an event.""" response = await test_admin.delete(f"/api/v1/admin/events/{stored_event.event_id}") diff --git a/backend/tests/e2e/test_events_routes.py b/backend/tests/e2e/test_events_routes.py index b73fa895..90bd6fe7 100644 --- a/backend/tests/e2e/test_events_routes.py +++ b/backend/tests/e2e/test_events_routes.py @@ -1,8 +1,7 @@ import asyncio import pytest -from app.domain.enums.events import EventType -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.schemas_pydantic.events import ( DeleteEventResponse, EventListResponse, @@ -13,9 +12,6 @@ ) from app.schemas_pydantic.execution import ExecutionResponse from httpx import AsyncClient -from pydantic import TypeAdapter - -DomainEventAdapter: TypeAdapter[DomainEvent] = TypeAdapter(DomainEvent) pytestmark = [pytest.mark.e2e, pytest.mark.kafka] @@ -162,7 +158,7 @@ async def test_get_user_events_with_filters( response = await test_user.get( "/api/v1/events/user", params={ - "event_types": [EventType.EXECUTION_REQUESTED], + "topics": ["execution_requested"], "limit": 10, }, ) @@ -193,7 +189,7 @@ async def test_query_events( response = await test_user.post( "/api/v1/events/query", json={ - "event_types": [EventType.EXECUTION_REQUESTED], + "topics": ["execution_requested"], "limit": 50, "skip": 0, }, @@ -271,7 +267,7 @@ async def test_get_event_statistics( stats = EventStatistics.model_validate(response.json()) assert stats.total_events >= 1 - assert stats.events_by_type is not None + assert stats.events_by_topic is not None assert stats.events_by_service is not None @pytest.mark.asyncio @@ -315,7 +311,7 @@ async def test_get_event_by_id( response = await test_user.get(f"/api/v1/events/{event_id}") assert response.status_code == 200 - event = DomainEventAdapter.validate_python(response.json()) + event = BaseEvent.model_validate(response.json()) assert event.event_id == event_id @@ -330,7 +326,7 @@ async def test_publish_event_admin_only( response = await test_admin.post( "/api/v1/events/publish", json={ - "event_type": EventType.SYSTEM_ERROR, + "topic": "system_error", "payload": { "error_type": "test_error", "message": "Test error message", @@ -354,7 +350,7 @@ async def test_publish_event_forbidden_for_user( response = await test_user.post( "/api/v1/events/publish", json={ - "event_type": EventType.SYSTEM_ERROR, + "topic": "system_error", "payload": { "error_type": "test_error", "message": "Test error message", @@ -380,7 +376,7 @@ async def test_aggregate_events( "/api/v1/events/aggregate", json={ "pipeline": [ - {"$group": {"_id": "$event_type", "count": {"$sum": 1}}} + {"$group": {"_id": "$topic", "count": {"$sum": 1}}} ], "limit": 100, }, @@ -398,9 +394,9 @@ class TestListEventTypes: @pytest.mark.asyncio async def test_list_event_types(self, test_admin: AsyncClient) -> None: """List available event types.""" - # First create an event so there's at least one type (requires admin) + # First create an event so there's at least one topic (requires admin) request = PublishEventRequest( - event_type=EventType.SCRIPT_SAVED, + topic="script_saved", payload={ "script_id": "test-script", "user_id": "test-user", @@ -430,7 +426,7 @@ async def test_delete_event_admin_only( publish_response = await test_admin.post( "/api/v1/events/publish", json={ - "event_type": EventType.SYSTEM_ERROR, + "topic": "system_error", "payload": { "error_type": "test_delete_error", "message": "Event to be deleted", diff --git a/backend/tests/e2e/test_execution_routes.py b/backend/tests/e2e/test_execution_routes.py index f40630d6..6ee945b7 100644 --- a/backend/tests/e2e/test_execution_routes.py +++ b/backend/tests/e2e/test_execution_routes.py @@ -8,9 +8,8 @@ import asyncio import pytest -from app.domain.enums.events import EventType from app.domain.enums.execution import ExecutionStatus -from app.domain.events.typed import ExecutionDomainEvent +from app.domain.events.typed import BaseEvent, ExecutionRequestedEvent from app.schemas_pydantic.execution import ( CancelExecutionRequest, CancelResponse, @@ -29,7 +28,7 @@ pytestmark = [pytest.mark.e2e, pytest.mark.k8s] # TypeAdapter for parsing list of execution events from API response -ExecutionEventsAdapter = TypeAdapter(list[ExecutionDomainEvent]) +ExecutionEventsAdapter = TypeAdapter(list[BaseEvent]) # Initial states when execution is created INITIAL_STATES = { @@ -383,12 +382,10 @@ async def test_get_execution_events(self, test_user: AsyncClient) -> None: # at least one execution event is present (COMPLETED is always stored by # the time we query since wait_for_terminal_state ensures execution finished). assert len(events) > 0 - event_types = {e.event_type for e in events} - assert event_types & {EventType.EXECUTION_REQUESTED, EventType.EXECUTION_COMPLETED} @pytest.mark.asyncio - async def test_get_events_filtered_by_type(self, test_user: AsyncClient) -> None: - """Filter events by event type.""" + async def test_get_events_filtered_by_topic(self, test_user: AsyncClient) -> None: + """Filter events by topic.""" request = ExecutionRequest(script="print('filter test')", lang="python", lang_version="3.11") response = await test_user.post("/api/v1/execute", json=request.model_dump()) @@ -399,13 +396,13 @@ async def test_get_events_filtered_by_type(self, test_user: AsyncClient) -> None events_response = await test_user.get( f"/api/v1/executions/{exec_response.execution_id}/events", - params={"event_types": [EventType.EXECUTION_REQUESTED]}, + params={"topics": [ExecutionRequestedEvent.topic()]}, ) assert events_response.status_code == 200 events = ExecutionEventsAdapter.validate_python(events_response.json()) - for event in events: - assert event.event_type == EventType.EXECUTION_REQUESTED + # When filtered by topic, all returned events should be of that topic + assert len(events) > 0 @pytest.mark.asyncio async def test_get_events_access_denied(self, test_user: AsyncClient, another_user: AsyncClient) -> None: diff --git a/backend/tests/e2e/test_k8s_worker_create_pod.py b/backend/tests/e2e/test_k8s_worker_create_pod.py index 91d9c0dd..1c5fc2fb 100644 --- a/backend/tests/e2e/test_k8s_worker_create_pod.py +++ b/backend/tests/e2e/test_k8s_worker_create_pod.py @@ -5,7 +5,7 @@ from app.core.metrics import EventMetrics from app.domain.enums.execution import QueuePriority from app.domain.events.typed import CreatePodCommandEvent, EventMetadata -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.services.k8s_worker import KubernetesWorker from app.settings import Settings from dishka import AsyncContainer @@ -22,7 +22,7 @@ async def test_worker_creates_configmap_and_pod( scope: AsyncContainer, test_settings: Settings ) -> None: api_client: k8s_client.ApiClient = await scope.get(k8s_client.ApiClient) - producer: UnifiedProducer = await scope.get(UnifiedProducer) + producer: EventPublisher = await scope.get(EventPublisher) event_metrics: EventMetrics = await scope.get(EventMetrics) worker = KubernetesWorker( diff --git a/backend/tests/load/plot_report.py b/backend/tests/load/plot_report.py index b415e15e..86cb0667 100644 --- a/backend/tests/load/plot_report.py +++ b/backend/tests/load/plot_report.py @@ -114,7 +114,7 @@ def plot_endpoint_throughput(report: ReportDict, out_dir: Path, top_n: int = 10) labels = [k for k, _ in data] total = [v.get("count", 0) for _, v in data] errors = [v.get("errors", 0) for _, v in data] - successes = [t - e for t, e in zip(total, errors)] + successes = [t - e for t, e in zip(total, errors, strict=True)] x = range(len(labels)) width = 0.45 diff --git a/backend/tests/unit/domain/events/test_event_schema_coverage.py b/backend/tests/unit/domain/events/test_event_schema_coverage.py index 5888ed92..b66209d9 100644 --- a/backend/tests/unit/domain/events/test_event_schema_coverage.py +++ b/backend/tests/unit/domain/events/test_event_schema_coverage.py @@ -1,183 +1,104 @@ """ -Validates complete correspondence between EventType enum and event classes. +Validates event class topic naming conventions for 1:1 topic mapping. This test ensures that: -1. Every EventType has a corresponding domain event class (in DomainEvent union) -2. Every EventType has a corresponding Kafka event class (DomainEvent subclass) -3. No orphan event classes exist (classes without matching EventType) +1. All BaseEvent subclasses have proper topic() classmethod +2. Topic names are derived correctly from class names (snake_case, no 'Event' suffix) +3. No duplicate topic names exist -Run this test to catch missing event implementations early. +Run this test to catch event naming issues early. """ -from typing import get_args - -from app.domain.enums.events import EventType -from app.domain.events.typed import BaseEvent, DomainEvent, DomainEventAdapter - - -def get_domain_event_classes() -> dict[EventType, type]: - """Extract EventType -> class mapping from DomainEvent union.""" - mapping: dict[EventType, type] = {} - union_types = get_args(DomainEvent) - # First element is the actual union, need to get its args - if union_types: - inner = union_types[0] - if hasattr(inner, "__args__"): - event_classes = inner.__args__ - else: - # Python 3.10+ union syntax - event_classes = get_args(inner) or [inner] - if not event_classes: - event_classes = list(union_types[:-1]) # Exclude Discriminator - else: - event_classes = [] - - # Fallback: iterate through all BaseEvent subclasses - if not event_classes: - event_classes = [] - for cls in BaseEvent.__subclasses__(): - if hasattr(cls, "model_fields") and "event_type" in cls.model_fields: - event_classes.append(cls) - - for cls in event_classes: - if hasattr(cls, "model_fields") and "event_type" in cls.model_fields: - field = cls.model_fields["event_type"] - if field.default is not None: - mapping[field.default] = cls - - return mapping - - -def get_kafka_event_classes() -> dict[EventType, type]: - """Extract EventType -> class mapping from DomainEvent union (same source).""" - return get_domain_event_classes() - - -class TestEventSchemaCoverage: - """Ensure complete correspondence between EventType and event classes.""" - - def test_all_event_types_have_domain_event_class(self) -> None: - """Every EventType must have a corresponding domain event class.""" - domain_mapping = get_domain_event_classes() - all_types = set(EventType) - covered_types = set(domain_mapping.keys()) - missing = all_types - covered_types - - assert not missing, ( - f"Missing domain event classes for {len(missing)} EventType(s):\n" - + "\n".join(f" - {et.value}: needs a class in typed.py" for et in sorted(missing, key=lambda x: x.value)) - ) +import re - def test_all_event_types_have_kafka_event_class(self) -> None: - """Every EventType must have a corresponding Kafka event class.""" - kafka_mapping = get_kafka_event_classes() - all_types = set(EventType) - covered_types = set(kafka_mapping.keys()) - missing = all_types - covered_types - - assert not missing, ( - f"Missing Kafka event classes for {len(missing)} EventType(s):\n" - + "\n".join( - f" - {et.value}: needs a class in infrastructure/kafka/events/" - for et in sorted(missing, key=lambda x: x.value) - ) - ) +from app.domain.events.typed import BaseEvent - def test_DomainEventAdapter_covers_all_types(self) -> None: - """The DomainEventAdapter TypeAdapter must handle all EventTypes.""" - errors: list[str] = [] - for et in EventType: - try: - # Validation will fail due to missing required fields, but that's OK - # We just want to confirm the type IS in the union (not "unknown discriminator") - DomainEventAdapter.validate_python({"event_type": et}) - except Exception as e: - error_str = str(e).lower() - # "validation error" means type IS recognized but fields are missing - that's fine - # "no match" or "discriminator" means type is NOT in union - that's a failure - if "no match" in error_str or "unable to extract" in error_str: - errors.append(f" - {et.value}: not in DomainEvent union") - - assert not errors, f"DomainEventAdapter missing {len(errors)} type(s):\n" + "\n".join(errors) - - def test_no_orphan_domain_event_classes(self) -> None: - """All domain event classes must have a corresponding EventType.""" - orphans: list[str] = [] - - for cls in BaseEvent.__subclasses__(): - # Skip test fixtures/mocks (private classes starting with _) - if cls.__name__.startswith("_"): - continue - if not hasattr(cls, "model_fields"): - continue - field = cls.model_fields.get("event_type") - if field is None: +def get_all_event_classes() -> list[type[BaseEvent]]: + """Get all BaseEvent subclasses.""" + classes: list[type[BaseEvent]] = [] + + def collect_subclasses(cls: type) -> None: + for subclass in cls.__subclasses__(): + if subclass.__name__.startswith("_"): continue - if field.default is None: - orphans.append(f" - {cls.__name__}: event_type field has no default") - elif not isinstance(field.default, EventType): - orphans.append(f" - {cls.__name__}: event_type default is not an EventType") + classes.append(subclass) + collect_subclasses(subclass) - assert not orphans, "Orphan domain event classes:\n" + "\n".join(orphans) + collect_subclasses(BaseEvent) + return classes - def test_no_orphan_kafka_event_classes(self) -> None: - """All Kafka event classes must have a corresponding EventType.""" - orphans: list[str] = [] - for cls in BaseEvent.__subclasses__(): - # Skip test fixtures/mocks (private classes starting with _) - if cls.__name__.startswith("_"): - continue - if not hasattr(cls, "model_fields"): - continue - field = cls.model_fields.get("event_type") - if field is None: - orphans.append(f" - {cls.__name__}: missing event_type field") - elif field.default is None: - orphans.append(f" - {cls.__name__}: event_type field has no default") - elif not isinstance(field.default, EventType): - orphans.append(f" - {cls.__name__}: event_type default is not an EventType") - - assert not orphans, "Orphan Kafka event classes:\n" + "\n".join(orphans) - - def test_domain_and_kafka_event_names_match(self) -> None: - """Domain and Kafka event classes for same EventType should have same name.""" - domain_mapping = get_domain_event_classes() - kafka_mapping = get_kafka_event_classes() - - mismatches: list[str] = [] - for et in EventType: - domain_cls = domain_mapping.get(et) - kafka_cls = kafka_mapping.get(et) - - if domain_cls and kafka_cls: - if domain_cls.__name__ != kafka_cls.__name__: - mismatches.append( - f" - {et.value}: domain={domain_cls.__name__}, kafka={kafka_cls.__name__}" - ) - - assert not mismatches, ( - f"Event class name mismatches for {len(mismatches)} type(s):\n" + "\n".join(mismatches) - ) +class TestEventTopicCoverage: + """Ensure proper topic naming for all event classes.""" + def test_all_event_classes_have_topic_method(self) -> None: + """Every BaseEvent subclass must have a topic() classmethod.""" + errors: list[str] = [] -class TestEventSchemaConsistency: - """Additional consistency checks between domain and Kafka event schemas.""" + for cls in get_all_event_classes(): + if not hasattr(cls, "topic"): + errors.append(f" - {cls.__name__}: missing topic() classmethod") + elif not callable(cls.topic): + errors.append(f" - {cls.__name__}: topic is not callable") - def test_event_type_count_sanity(self) -> None: - """Sanity check: we should have a reasonable number of event types.""" - count = len(EventType) - assert count >= 50, f"Expected at least 50 EventTypes, got {count}" + assert not errors, "Event classes missing topic():\n" + "\n".join(errors) - def test_all_event_types_are_lowercase_snake_case(self) -> None: - """All EventType values should be lowercase snake_case.""" + def test_all_topics_are_snake_case(self) -> None: + """All topic names should be lowercase snake_case.""" violations: list[str] = [] - for et in EventType: - value = et.value - if value != value.lower(): - violations.append(f" - {et.name}: '{value}' contains uppercase") - if " " in value or "-" in value: - violations.append(f" - {et.name}: '{value}' contains spaces or hyphens") - - assert not violations, "EventType naming violations:\n" + "\n".join(violations) + + for cls in get_all_event_classes(): + topic = cls.topic() + if topic != topic.lower(): + violations.append(f" - {cls.__name__}: topic '{topic}' contains uppercase") + if " " in topic or "-" in topic: + violations.append(f" - {cls.__name__}: topic '{topic}' contains spaces or hyphens") + if not re.match(r"^[a-z][a-z0-9_]*$", topic): + violations.append(f" - {cls.__name__}: topic '{topic}' has invalid format") + + assert not violations, "Topic naming violations:\n" + "\n".join(violations) + + def test_no_duplicate_topics(self) -> None: + """All event classes must have unique topic names.""" + topic_to_classes: dict[str, list[str]] = {} + + for cls in get_all_event_classes(): + topic = cls.topic() + if topic not in topic_to_classes: + topic_to_classes[topic] = [] + topic_to_classes[topic].append(cls.__name__) + + duplicates = {t: classes for t, classes in topic_to_classes.items() if len(classes) > 1} + + assert not duplicates, ( + "Duplicate topic names found:\n" + + "\n".join(f" - {topic}: {', '.join(classes)}" for topic, classes in duplicates.items()) + ) + + def test_topic_derived_from_class_name(self) -> None: + """Topic names should be derived from class names (snake_case, no 'Event' suffix).""" + errors: list[str] = [] + + for cls in get_all_event_classes(): + topic = cls.topic() + class_name = cls.__name__ + + # Expected: remove 'Event' suffix and convert to snake_case + expected_base = class_name.removesuffix("Event") + # Convert PascalCase to snake_case + expected_topic = re.sub(r"(? None: + """Sanity check: we should have a reasonable number of event classes.""" + count = len(get_all_event_classes()) + assert count >= 10, f"Expected at least 10 event classes, got {count}" diff --git a/backend/tests/unit/events/test_mappings_and_types.py b/backend/tests/unit/events/test_mappings_and_types.py index cdbefb24..ae13e8b5 100644 --- a/backend/tests/unit/events/test_mappings_and_types.py +++ b/backend/tests/unit/events/test_mappings_and_types.py @@ -1,17 +1,20 @@ -from app.domain.enums.events import EventType -from app.domain.enums.kafka import KafkaTopic -from app.infrastructure.kafka.mappings import ( - get_event_class_for_type, - get_event_types_for_topic, - get_topic_for_event, -) +"""Tests for Kafka topic mappings. +Note: The EventType to event class mapping is deprecated in favor of +1:1 topic mapping where topic names are derived from class names. +""" +from app.domain.events.typed import BaseEvent, ExecutionRequestedEvent -def test_event_mappings_topics() -> None: - # A few spot checks - assert get_topic_for_event(EventType.EXECUTION_REQUESTED) == KafkaTopic.EXECUTION_EVENTS - cls = get_event_class_for_type(EventType.CREATE_POD_COMMAND) - assert cls is not None - # All event types for a topic include at least one of the checked types - ev_types = get_event_types_for_topic(KafkaTopic.EXECUTION_EVENTS) - assert EventType.EXECUTION_REQUESTED in ev_types + +def test_event_class_topic_derivation() -> None: + """Event classes derive their topic from class name.""" + # Topic is derived from class name: ExecutionRequestedEvent -> execution_requested + assert ExecutionRequestedEvent.topic() == "execution_requested" + + # BaseEvent itself has topic 'base' + assert BaseEvent.topic() == "base" + + +def test_topic_with_prefix() -> None: + """Topics can have an optional prefix.""" + assert ExecutionRequestedEvent.topic("prefix_") == "prefix_execution_requested" diff --git a/backend/tests/unit/events/test_schema_registry_manager.py b/backend/tests/unit/events/test_schema_registry_manager.py index cdc3159b..bf9d99e6 100644 --- a/backend/tests/unit/events/test_schema_registry_manager.py +++ b/backend/tests/unit/events/test_schema_registry_manager.py @@ -1,34 +1,51 @@ import pytest +from app.domain.enums.execution import QueuePriority +from app.domain.events.typed import BaseEvent, EventMetadata, ExecutionRequestedEvent from pydantic import ValidationError -from app.domain.enums.execution import QueuePriority -from app.domain.events.typed import DomainEventAdapter, ExecutionRequestedEvent - - -def test_domain_event_adapter_execution_requested() -> None: - data = { - "event_type": "execution_requested", - "execution_id": "e1", - "script": "print('ok')", - "language": "python", - "language_version": "3.11", - "runtime_image": "python:3.11-slim", - "runtime_command": ["python"], - "runtime_filename": "main.py", - "timeout_seconds": 30, - "cpu_limit": "100m", - "memory_limit": "128Mi", - "cpu_request": "50m", - "memory_request": "64Mi", - "priority": QueuePriority.NORMAL, - "metadata": {"service_name": "t", "service_version": "1.0"}, - } - ev = DomainEventAdapter.validate_python(data) + +def test_execution_requested_event_validation() -> None: + """Test that ExecutionRequestedEvent can be instantiated with valid data.""" + ev = ExecutionRequestedEvent( + execution_id="e1", + script="print('ok')", + language="python", + language_version="3.11", + runtime_image="python:3.11-slim", + runtime_command=["python"], + runtime_filename="main.py", + timeout_seconds=30, + cpu_limit="100m", + memory_limit="128Mi", + cpu_request="50m", + memory_request="64Mi", + priority=QueuePriority.NORMAL, + metadata=EventMetadata(service_name="t", service_version="1.0"), + ) assert isinstance(ev, ExecutionRequestedEvent) assert ev.execution_id == "e1" assert ev.language == "python" -def test_domain_event_adapter_missing_type_raises() -> None: +def test_execution_requested_event_topic() -> None: + """Test that ExecutionRequestedEvent has correct topic.""" + assert ExecutionRequestedEvent.topic() == "execution_requested" + + +def test_event_missing_required_fields_raises() -> None: + """Test that missing required fields raise ValidationError.""" with pytest.raises(ValidationError): - DomainEventAdapter.validate_python({}) + ExecutionRequestedEvent( # type: ignore[call-arg] + execution_id="e1", + # Missing all other required fields + metadata=EventMetadata(service_name="t", service_version="1.0"), + ) + + +def test_base_event_topic_derivation() -> None: + """Test that topic is derived correctly from class name.""" + # BaseEvent itself should have topic 'base' (class name without 'Event') + assert BaseEvent.topic() == "base" + + # Subclasses should have their own topics + assert ExecutionRequestedEvent.topic() == "execution_requested" diff --git a/backend/tests/unit/schemas_pydantic/test_events_schemas.py b/backend/tests/unit/schemas_pydantic/test_events_schemas.py index 38d17179..5bed4524 100644 --- a/backend/tests/unit/schemas_pydantic/test_events_schemas.py +++ b/backend/tests/unit/schemas_pydantic/test_events_schemas.py @@ -7,7 +7,7 @@ def test_event_filter_request_sort_validator_accepts_allowed_fields() -> None: req = EventFilterRequest(sort_by="timestamp", sort_order=SortOrder.DESC) assert req.sort_by == "timestamp" - for field in ("event_type", "aggregate_id", "correlation_id", "stored_at"): + for field in ("topic", "aggregate_id", "correlation_id", "stored_at"): req2 = EventFilterRequest(sort_by=field) assert req2.sort_by == field diff --git a/backend/tests/unit/services/idempotency/test_idempotency_manager.py b/backend/tests/unit/services/idempotency/test_idempotency_manager.py index 98d2c433..8a30f3ea 100644 --- a/backend/tests/unit/services/idempotency/test_idempotency_manager.py +++ b/backend/tests/unit/services/idempotency/test_idempotency_manager.py @@ -40,13 +40,23 @@ def test_custom_config(self) -> None: assert config.max_result_size_bytes == 2048 +class _TestEvent(BaseEvent): + """Test event class for idempotency testing.""" + + @classmethod + def topic(cls, prefix: str = "") -> str: + return f"{prefix}t" if prefix else "t" + + def test_manager_generate_key_variants(database_metrics: DatabaseMetrics) -> None: + from app.domain.events.typed import EventMetadata + repo = MagicMock() mgr = IdempotencyManager(IdempotencyConfig(), repo, _test_logger, database_metrics=database_metrics) - ev = MagicMock(spec=BaseEvent) - ev.event_type = "t" - ev.event_id = "e" - ev.model_dump.return_value = {"event_id": "e", "event_type": "t"} + ev = _TestEvent( + event_id="e", + metadata=EventMetadata(service_name="test", service_version="1.0"), + ) assert mgr._generate_key(ev, KeyStrategy.EVENT_BASED) == "idempotency:t:e" ch = mgr._generate_key(ev, KeyStrategy.CONTENT_HASH) diff --git a/backend/tests/unit/services/pod_monitor/test_config_and_init.py b/backend/tests/unit/services/pod_monitor/test_config_and_init.py index 66e8a89b..9b7983e5 100644 --- a/backend/tests/unit/services/pod_monitor/test_config_and_init.py +++ b/backend/tests/unit/services/pod_monitor/test_config_and_init.py @@ -9,8 +9,8 @@ def test_pod_monitor_config_defaults() -> None: cfg = PodMonitorConfig() assert cfg.namespace in {"integr8scode", "default"} - assert isinstance(cfg.pod_events_topic, str) and cfg.pod_events_topic - assert isinstance(cfg.execution_completed_topic, str) + assert cfg.label_selector == "app=integr8s,component=executor" + assert cfg.watch_timeout_seconds == 300 assert cfg.ignored_pod_phases == [] diff --git a/backend/tests/unit/services/pod_monitor/test_event_mapper.py b/backend/tests/unit/services/pod_monitor/test_event_mapper.py index 64d27a70..ff25f8b6 100644 --- a/backend/tests/unit/services/pod_monitor/test_event_mapper.py +++ b/backend/tests/unit/services/pod_monitor/test_event_mapper.py @@ -3,9 +3,6 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from kubernetes_asyncio.client import V1Pod, V1PodCondition - -from app.domain.enums.events import EventType from app.domain.enums.storage import ExecutionErrorType from app.domain.events.typed import ( EventMetadata, @@ -15,6 +12,8 @@ PodRunningEvent, ) from app.services.pod_monitor.event_mapper import PodContext, PodEventMapper +from kubernetes_asyncio.client import V1Pod, V1PodCondition + from tests.unit.conftest import make_container_status, make_pod pytestmark = pytest.mark.unit @@ -62,7 +61,7 @@ async def test_pending_running_and_succeeded_mapping() -> None: conditions=[V1PodCondition(type="PodScheduled", status="True")], ) evts = await pem.map_pod_event(pend, "ADDED") - assert any(e.event_type == EventType.POD_SCHEDULED for e in evts) + assert any(type(e).topic() == "pod_scheduled" for e in evts) # Running -> running run = make_pod( @@ -75,8 +74,8 @@ async def test_pending_running_and_succeeded_mapping() -> None: ], ) evts = await pem.map_pod_event(run, "MODIFIED") - assert any(e.event_type == EventType.POD_RUNNING for e in evts) - pr = [e for e in evts if e.event_type == EventType.POD_RUNNING][0] + assert any(type(e).topic() == "pod_running" for e in evts) + pr = [e for e in evts if type(e).topic() == "pod_running"][0] assert isinstance(pr, PodRunningEvent) assert any("waiting" in s.state for s in pr.container_statuses) and any( "terminated" in s.state for s in pr.container_statuses @@ -90,7 +89,7 @@ async def test_pending_running_and_succeeded_mapping() -> None: container_statuses=[make_container_status(terminated_exit_code=0)], ) evts = await pem.map_pod_event(suc, "MODIFIED") - comp = [e for e in evts if e.event_type == EventType.EXECUTION_COMPLETED][0] + comp = [e for e in evts if type(e).topic() == "execution_completed"][0] assert isinstance(comp, ExecutionCompletedEvent) assert comp.exit_code == 0 and comp.stdout == "ok" @@ -111,7 +110,7 @@ async def test_failed_timeout_and_deleted() -> None: ) ev = (await pem.map_pod_event(pod_to, "MODIFIED"))[0] assert isinstance(ev, ExecutionTimeoutEvent) - assert ev.event_type == EventType.EXECUTION_TIMEOUT and ev.timeout_seconds == 5 + assert type(ev).topic() == "execution_timeout" and ev.timeout_seconds == 5 # Failed: terminated exit_code nonzero pem_no_logs = PodEventMapper(k8s_api=_make_mock_api(""), logger=_test_logger) @@ -123,7 +122,7 @@ async def test_failed_timeout_and_deleted() -> None: ) evf = (await pem_no_logs.map_pod_event(pod_fail, "MODIFIED"))[0] assert isinstance(evf, ExecutionFailedEvent) - assert evf.event_type == EventType.EXECUTION_FAILED and evf.error_type in {ExecutionErrorType.SCRIPT_ERROR} + assert type(evf).topic() == "execution_failed" and evf.error_type in {ExecutionErrorType.SCRIPT_ERROR} # Deleted with exit code 0 returns completed valid_logs_0 = json.dumps({"stdout": "", "stderr": "", "exit_code": 0, "resource_usage": {}}) @@ -135,7 +134,7 @@ async def test_failed_timeout_and_deleted() -> None: container_statuses=[make_container_status(terminated_exit_code=0, terminated_reason="Completed")], ) evd = (await pem_completed.map_pod_event(pod_del, "DELETED"))[0] - assert evd.event_type == EventType.EXECUTION_COMPLETED + assert type(evd).topic() == "execution_completed" def test_extract_id_and_metadata_priority_and_duplicates() -> None: @@ -264,11 +263,11 @@ async def test_all_containers_succeeded_and_cache_behavior() -> None: ) # When all succeeded, failed mapping returns completed instead of failed ev = (await pem.map_pod_event(pod, "MODIFIED"))[0] - assert ev.event_type == EventType.EXECUTION_COMPLETED + assert type(ev).topic() == "execution_completed" # Cache prevents duplicate for same phase p2 = make_pod(name="p2", phase="Running") a = await pem.map_pod_event(p2, "ADDED") b = await pem.map_pod_event(p2, "MODIFIED") - assert a == [] or all(x.event_type for x in a) - assert b == [] or all(x.event_type for x in b) + assert a == [] or all(type(x).topic() for x in a) + assert b == [] or all(type(x).topic() for x in b) diff --git a/backend/tests/unit/services/pod_monitor/test_monitor.py b/backend/tests/unit/services/pod_monitor/test_monitor.py index 14f0a61d..b12042a8 100644 --- a/backend/tests/unit/services/pod_monitor/test_monitor.py +++ b/backend/tests/unit/services/pod_monitor/test_monitor.py @@ -4,16 +4,15 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from app.core.metrics import EventMetrics, KubernetesMetrics +from app.core.metrics import KubernetesMetrics from app.domain.events.typed import ( - DomainEvent, + BaseEvent, EventMetadata, ExecutionCompletedEvent, ExecutionStartedEvent, ResourceUsageDomain, ) -from app.events.core import UnifiedProducer -from app.services.kafka_event_service import KafkaEventService +from app.events.core import EventPublisher from app.services.pod_monitor.config import PodMonitorConfig from app.services.pod_monitor.event_mapper import PodEventMapper from app.services.pod_monitor.monitor import ( @@ -21,7 +20,6 @@ PodMonitor, WatchEventType, ) -from app.settings import Settings from kubernetes_asyncio import client as k8s_client from kubernetes_asyncio.client import V1Pod from kubernetes_asyncio.client.rest import ApiException @@ -38,40 +36,27 @@ _test_logger = logging.getLogger("test.pod_monitor") -# ===== Test doubles for KafkaEventService dependencies ===== +# ===== Test doubles for EventPublisher ===== -class FakeUnifiedProducer(UnifiedProducer): +class FakeEventPublisher(EventPublisher): """Fake producer that captures events without Kafka.""" def __init__(self) -> None: # Don't call super().__init__ - we don't need real Kafka - self.produced_events: list[tuple[DomainEvent, str | None]] = [] + self.produced_events: list[tuple[BaseEvent, str | None]] = [] self.logger = _test_logger - async def produce( - self, event_to_produce: DomainEvent, key: str | None = None, headers: dict[str, str] | None = None - ) -> None: - self.produced_events.append((event_to_produce, key)) + async def publish( + self, event: BaseEvent, key: str | None = None + ) -> str: + self.produced_events.append((event, key)) + return event.event_id async def aclose(self) -> None: pass -def create_test_kafka_event_service(event_metrics: EventMetrics) -> tuple[KafkaEventService, FakeUnifiedProducer]: - """Create real KafkaEventService with fake dependencies for testing.""" - fake_producer = FakeUnifiedProducer() - settings = Settings(config_path="config.test.toml") - - service = KafkaEventService( - kafka_producer=fake_producer, - settings=settings, - logger=_test_logger, - event_metrics=event_metrics, - ) - return service, fake_producer - - # ===== Helpers to create test instances with pure DI ===== @@ -83,10 +68,9 @@ def make_mock_api_client() -> MagicMock: def make_pod_monitor( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, config: PodMonitorConfig | None = None, - kafka_service: KafkaEventService | None = None, + producer: FakeEventPublisher | None = None, api_client: k8s_client.ApiClient | None = None, event_mapper: PodEventMapper | None = None, mock_v1: Any | None = None, @@ -95,16 +79,16 @@ def make_pod_monitor( events: list[dict[str, Any]] | None = None, resource_version: str = "rv1", list_resource_version: str = "list-rv1", -) -> PodMonitor: +) -> tuple[PodMonitor, FakeEventPublisher]: """Create PodMonitor with sensible test defaults.""" cfg = config or PodMonitorConfig() client = api_client or make_mock_api_client() mapper = event_mapper or PodEventMapper(logger=_test_logger, k8s_api=make_mock_v1_api("{}")) - service = kafka_service or create_test_kafka_event_service(event_metrics)[0] + fake_producer = producer or FakeEventPublisher() monitor = PodMonitor( config=cfg, - kafka_event_service=service, + producer=fake_producer, logger=_test_logger, api_client=client, event_mapper=mapper, @@ -115,7 +99,7 @@ def make_pod_monitor( monitor._v1 = mock_v1 or make_mock_v1_api(pods=pods, list_resource_version=list_resource_version) monitor._watch = mock_watch or make_mock_watch(events or [], resource_version) - return monitor + return monitor, fake_producer # ===== Tests ===== @@ -123,15 +107,15 @@ def make_pod_monitor( @pytest.mark.asyncio async def test_watch_pod_events_list_then_watch( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: """First call does LIST + WATCH; second call skips LIST.""" cfg = PodMonitorConfig() pod = make_pod(name="existing", phase="Running", resource_version="rv1") - pm = make_pod_monitor( - event_metrics, kubernetes_metrics, config=cfg, + pm, _ = make_pod_monitor( + kubernetes_metrics, config=cfg, pods=[pod], list_resource_version="list-rv5", events=[{"type": "MODIFIED", "object": make_pod(name="existing", phase="Succeeded", resource_version="rv6")}], resource_version="rv7", @@ -151,7 +135,7 @@ async def test_watch_pod_events_list_then_watch( @pytest.mark.asyncio async def test_watch_pod_events_with_field_selector( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() cfg.field_selector = "status.phase=Running" @@ -177,8 +161,8 @@ def track_stream(func: Any, **kwargs: Any) -> MockWatchStream: # noqa: ARG001 tracking_watch.stop.return_value = None tracking_watch.resource_version = "rv1" - pm = make_pod_monitor( - event_metrics, kubernetes_metrics, config=cfg, + pm, _ = make_pod_monitor( + kubernetes_metrics, config=cfg, mock_v1=tracking_v1, mock_watch=tracking_watch, ) @@ -189,11 +173,11 @@ def track_stream(func: Any, **kwargs: Any) -> MockWatchStream: # noqa: ARG001 @pytest.mark.asyncio async def test_watch_pod_events_raises_api_exception( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: """watch_pod_events propagates ApiException to the caller.""" cfg = PodMonitorConfig() - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg) + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg) # Pre-set resource version so LIST is skipped pm._last_resource_version = "rv1" @@ -208,15 +192,15 @@ async def test_watch_pod_events_raises_api_exception( @pytest.mark.asyncio async def test_watch_resets_after_410( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: """After 410 Gone resets _last_resource_version, next call re-LISTs.""" cfg = PodMonitorConfig() pod = make_pod(name="p1", phase="Running", resource_version="rv10") - pm = make_pod_monitor( - event_metrics, kubernetes_metrics, config=cfg, + pm, _ = make_pod_monitor( + kubernetes_metrics, config=cfg, pods=[pod], list_resource_version="list-rv10", events=[], resource_version="rv11", ) @@ -233,10 +217,10 @@ async def test_watch_resets_after_410( @pytest.mark.asyncio async def test_process_raw_event_invalid( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg) + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg) # Should not raise - invalid events are caught and logged await pm._process_raw_event({}) @@ -244,10 +228,10 @@ async def test_process_raw_event_invalid( @pytest.mark.asyncio async def test_process_raw_event_with_metadata( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg) + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg) processed: list[PodEvent] = [] @@ -274,7 +258,7 @@ async def mock_process(event: PodEvent) -> None: @pytest.mark.asyncio async def test_process_pod_event_full_flow( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() cfg.ignored_pod_phases = ["Unknown"] @@ -291,7 +275,7 @@ class Event: def clear_cache(self) -> None: pass - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg, event_mapper=MockMapper()) # type: ignore[arg-type] + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg, event_mapper=MockMapper()) # type: ignore[arg-type] published: list[Any] = [] @@ -332,7 +316,7 @@ async def mock_publish(event: Any, pod: Any) -> None: # noqa: ARG001 @pytest.mark.asyncio async def test_process_pod_event_exception_handling( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() @@ -343,7 +327,7 @@ async def map_pod_event(self, pod: Any, event_type: WatchEventType) -> list[Any] def clear_cache(self) -> None: pass - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg, event_mapper=FailMapper()) # type: ignore[arg-type] + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg, event_mapper=FailMapper()) # type: ignore[arg-type] event = PodEvent( event_type=WatchEventType.ADDED, @@ -357,11 +341,10 @@ def clear_cache(self) -> None: @pytest.mark.asyncio async def test_publish_event_full_flow( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() - service, fake_producer = create_test_kafka_event_service(event_metrics) - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg, kafka_service=service) + pm, fake_producer = make_pod_monitor(kubernetes_metrics, config=cfg) event = ExecutionCompletedEvent( execution_id="exec1", @@ -380,25 +363,19 @@ async def test_publish_event_full_flow( @pytest.mark.asyncio async def test_publish_event_exception_handling( - event_metrics: EventMetrics, kubernetes_metrics: KubernetesMetrics, + kubernetes_metrics: KubernetesMetrics, ) -> None: cfg = PodMonitorConfig() - class FailingProducer(FakeUnifiedProducer): + class FailingProducer(FakeEventPublisher): async def produce( - self, event_to_produce: DomainEvent, key: str | None = None, headers: dict[str, str] | None = None - ) -> None: + self, event: BaseEvent, key: str | None = None + ) -> str: raise RuntimeError("Publish failed") failing_producer = FailingProducer() - failing_service = KafkaEventService( - kafka_producer=failing_producer, - settings=Settings(config_path="config.test.toml"), - logger=_test_logger, - event_metrics=event_metrics, - ) - pm = make_pod_monitor(event_metrics, kubernetes_metrics, config=cfg, kafka_service=failing_service) + pm, _ = make_pod_monitor(kubernetes_metrics, config=cfg, producer=failing_producer) event = ExecutionStartedEvent( execution_id="exec1", diff --git a/backend/tests/unit/services/result_processor/test_processor.py b/backend/tests/unit/services/result_processor/test_processor.py index 7199bd30..0e2edb53 100644 --- a/backend/tests/unit/services/result_processor/test_processor.py +++ b/backend/tests/unit/services/result_processor/test_processor.py @@ -39,28 +39,6 @@ def _make_processor( ) -class TestHandlerTypeGuards: - """Handlers must reject wrong event types with TypeError.""" - - async def test_completed_rejects_wrong_type(self, execution_metrics: ExecutionMetrics) -> None: - processor = _make_processor(execution_metrics) - wrong = ExecutionFailedEvent(execution_id="e1", exit_code=1, error_type=ExecutionErrorType.SCRIPT_ERROR, metadata=_METADATA) - with pytest.raises(TypeError, match="Expected ExecutionCompletedEvent"): - await processor.handle_execution_completed(wrong) - - async def test_failed_rejects_wrong_type(self, execution_metrics: ExecutionMetrics) -> None: - processor = _make_processor(execution_metrics) - wrong = ExecutionCompletedEvent(execution_id="e1", exit_code=0, metadata=_METADATA) - with pytest.raises(TypeError, match="Expected ExecutionFailedEvent"): - await processor.handle_execution_failed(wrong) - - async def test_timeout_rejects_wrong_type(self, execution_metrics: ExecutionMetrics) -> None: - processor = _make_processor(execution_metrics) - wrong = ExecutionCompletedEvent(execution_id="e1", exit_code=0, metadata=_METADATA) - with pytest.raises(TypeError, match="Expected ExecutionTimeoutEvent"): - await processor.handle_execution_timeout(wrong) - - class TestHandleExecutionCompleted: async def test_raises_when_execution_not_found(self, execution_metrics: ExecutionMetrics) -> None: repo = AsyncMock() @@ -91,7 +69,7 @@ async def test_stores_result_and_publishes(self, execution_metrics: ExecutionMet assert result_arg.execution_id == "e1" assert result_arg.status == ExecutionStatus.COMPLETED assert result_arg.exit_code == 0 - producer.produce.assert_awaited_once() + producer.publish.assert_awaited_once() class TestHandleExecutionFailed: @@ -115,7 +93,7 @@ async def test_stores_result_and_publishes(self, execution_metrics: ExecutionMet assert result_arg.execution_id == "e2" assert result_arg.status == ExecutionStatus.FAILED assert result_arg.exit_code == 1 - producer.produce.assert_awaited_once() + producer.publish.assert_awaited_once() class TestHandleExecutionTimeout: @@ -138,4 +116,4 @@ async def test_stores_result_and_publishes(self, execution_metrics: ExecutionMet assert result_arg.execution_id == "e3" assert result_arg.status == ExecutionStatus.TIMEOUT assert result_arg.exit_code == -1 - producer.produce.assert_awaited_once() + producer.publish.assert_awaited_once() diff --git a/backend/tests/unit/services/saga/test_execution_saga_steps.py b/backend/tests/unit/services/saga/test_execution_saga_steps.py index f0349f12..5011d0a5 100644 --- a/backend/tests/unit/services/saga/test_execution_saga_steps.py +++ b/backend/tests/unit/services/saga/test_execution_saga_steps.py @@ -1,8 +1,8 @@ import pytest from app.db.repositories.resource_allocation_repository import ResourceAllocationRepository -from app.domain.events.typed import DomainEvent, ExecutionRequestedEvent +from app.domain.events.typed import BaseEvent, ExecutionRequestedEvent from app.domain.saga import DomainResourceAllocation, DomainResourceAllocationCreate -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.services.saga.execution_saga import ( AllocateResourcesStep, CreatePodStep, @@ -81,15 +81,15 @@ async def test_allocate_resources_step_paths() -> None: await AllocateResourcesStep(alloc_repo=_FakeAllocRepo(active=100)).execute(ctx2, _req()) -class _FakeProducer(UnifiedProducer): - """Fake UnifiedProducer for testing.""" +class _FakeProducer(EventPublisher): + """Fake EventPublisher for testing.""" def __init__(self) -> None: - self.events: list[DomainEvent] = [] + self.events: list[BaseEvent] = [] - async def produce(self, event_to_produce: DomainEvent, key: str | None = None, - headers: dict[str, str] | None = None) -> None: - self.events.append(event_to_produce) + async def publish(self, event: BaseEvent, key: str | None = None) -> str: + self.events.append(event) + return event.event_id @pytest.mark.asyncio @@ -152,10 +152,13 @@ async def test_delete_pod_compensation_variants() -> None: def test_execution_saga_bind_and_get_steps_sets_flags_and_types() -> None: - class DummyProd(UnifiedProducer): + class DummyProd(EventPublisher): def __init__(self) -> None: pass + async def publish(self, event: BaseEvent, key: str | None = None) -> str: + return event.event_id + class DummyAlloc(ResourceAllocationRepository): def __init__(self) -> None: pass diff --git a/backend/tests/unit/services/saga/test_saga_comprehensive.py b/backend/tests/unit/services/saga/test_saga_comprehensive.py index d5eea475..2717d0f8 100644 --- a/backend/tests/unit/services/saga/test_saga_comprehensive.py +++ b/backend/tests/unit/services/saga/test_saga_comprehensive.py @@ -7,7 +7,7 @@ import pytest from app.domain.enums.saga import SagaState -from app.domain.events.typed import DomainEvent, ExecutionRequestedEvent +from app.domain.events.typed import BaseEvent, ExecutionRequestedEvent from app.domain.saga.models import Saga from app.services.saga.saga_step import CompensationStep, SagaContext, SagaStep @@ -21,12 +21,12 @@ async def compensate(self, context: SagaContext) -> bool: # noqa: ARG002 return True -class _Step(SagaStep[DomainEvent]): +class _Step(SagaStep[BaseEvent]): def __init__(self, name: str, ok: bool = True) -> None: super().__init__(name) self._ok = ok - async def execute(self, context: SagaContext, event: DomainEvent) -> bool: # noqa: ARG002 + async def execute(self, context: SagaContext, event: BaseEvent) -> bool: # noqa: ARG002 return self._ok def get_compensation(self) -> CompensationStep: diff --git a/backend/tests/unit/services/saga/test_saga_orchestrator_unit.py b/backend/tests/unit/services/saga/test_saga_orchestrator_unit.py index eb0228b4..4e98900d 100644 --- a/backend/tests/unit/services/saga/test_saga_orchestrator_unit.py +++ b/backend/tests/unit/services/saga/test_saga_orchestrator_unit.py @@ -4,10 +4,10 @@ from app.db.repositories.resource_allocation_repository import ResourceAllocationRepository from app.db.repositories.saga_repository import SagaRepository from app.domain.enums.saga import SagaState -from app.domain.events.typed import DomainEvent +from app.domain.events.typed import BaseEvent from app.domain.saga import DomainResourceAllocation, DomainResourceAllocationCreate from app.domain.saga.models import Saga, SagaConfig -from app.events.core import UnifiedProducer +from app.events.core import EventPublisher from app.services.saga.execution_saga import ExecutionSaga from app.services.saga.saga_orchestrator import SagaOrchestrator @@ -41,16 +41,14 @@ async def upsert_saga(self, saga: Saga) -> bool: return True -class _FakeProd(UnifiedProducer): - """Fake UnifiedProducer for testing.""" +class _FakeProd(EventPublisher): + """Fake EventPublisher for testing.""" def __init__(self) -> None: pass # Skip parent __init__ - async def produce( - self, event_to_produce: DomainEvent, key: str | None = None, headers: dict[str, str] | None = None - ) -> None: - return None + async def publish(self, event: BaseEvent, key: str | None = None) -> str: + return event.event_id class _FakeAlloc(ResourceAllocationRepository): diff --git a/backend/tests/unit/services/sse/test_kafka_redis_bridge.py b/backend/tests/unit/services/sse/test_kafka_redis_bridge.py index f3122dc4..47f8547d 100644 --- a/backend/tests/unit/services/sse/test_kafka_redis_bridge.py +++ b/backend/tests/unit/services/sse/test_kafka_redis_bridge.py @@ -1,7 +1,7 @@ import logging import pytest -from app.domain.events.typed import DomainEvent, EventMetadata, ExecutionStartedEvent +from app.domain.events.typed import BaseEvent, EventMetadata, ExecutionStartedEvent from app.services.sse.redis_bus import SSERedisBus pytestmark = pytest.mark.unit @@ -14,10 +14,10 @@ class _FakeBus(SSERedisBus): def __init__(self) -> None: # Skip parent __init__ - no real Redis - self.published: list[tuple[str, DomainEvent]] = [] + self.published: list[tuple[str, BaseEvent]] = [] self.logger = _test_logger - async def publish_event(self, execution_id: str, event: DomainEvent) -> None: + async def publish_event(self, execution_id: str, event: BaseEvent) -> None: self.published.append((execution_id, event)) @@ -34,5 +34,6 @@ async def test_route_domain_event_publishes_to_redis() -> None: assert fake_bus.published == [] # Proper event is published - await fake_bus.route_domain_event(ExecutionStartedEvent(execution_id="exec-123", pod_name="p", metadata=_make_metadata())) + event = ExecutionStartedEvent(execution_id="exec-123", pod_name="p", metadata=_make_metadata()) + await fake_bus.route_domain_event(event) assert fake_bus.published and fake_bus.published[-1][0] == "exec-123" diff --git a/backend/uv.lock b/backend/uv.lock index 015e47fe..a10d95f1 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -239,12 +239,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] -[[package]] -name = "avro-python3" -version = "1.10.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/97/7a6970380ca8db9139a3cc0b0e3e0dd3e4bc584fb3644e1d06e71e1a55f0/avro-python3-1.10.2.tar.gz", hash = "sha256:3b63f24e6b04368c3e4a6f923f484be0230d821aad65ac36108edbff29e9aaab", size = 38701, upload-time = "2021-03-17T10:43:56.97Z" } - [[package]] name = "backoff" version = "2.2.1" @@ -713,41 +707,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" }, ] -[[package]] -name = "fastavro" -version = "1.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/8b/fa2d3287fd2267be6261d0177c6809a7fa12c5600ddb33490c8dc29e77b2/fastavro-1.12.1.tar.gz", hash = "sha256:2f285be49e45bc047ab2f6bed040bb349da85db3f3c87880e4b92595ea093b2b", size = 1025661, upload-time = "2025-10-10T15:40:55.41Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/f0/10bd1a3d08667fa0739e2b451fe90e06df575ec8b8ba5d3135c70555c9bd/fastavro-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:509818cb24b98a804fc80be9c5fed90f660310ae3d59382fc811bfa187122167", size = 1009057, upload-time = "2025-10-10T15:41:24.556Z" }, - { url = "https://files.pythonhosted.org/packages/78/ad/0d985bc99e1fa9e74c636658000ba38a5cd7f5ab2708e9c62eaf736ecf1a/fastavro-1.12.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089e155c0c76e0d418d7e79144ce000524dd345eab3bc1e9c5ae69d500f71b14", size = 3391866, upload-time = "2025-10-10T15:41:26.882Z" }, - { url = "https://files.pythonhosted.org/packages/0d/9e/b4951dc84ebc34aac69afcbfbb22ea4a91080422ec2bfd2c06076ff1d419/fastavro-1.12.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44cbff7518901c91a82aab476fcab13d102e4999499df219d481b9e15f61af34", size = 3458005, upload-time = "2025-10-10T15:41:29.017Z" }, - { url = "https://files.pythonhosted.org/packages/af/f8/5a8df450a9f55ca8441f22ea0351d8c77809fc121498b6970daaaf667a21/fastavro-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a275e48df0b1701bb764b18a8a21900b24cf882263cb03d35ecdba636bbc830b", size = 3295258, upload-time = "2025-10-10T15:41:31.564Z" }, - { url = "https://files.pythonhosted.org/packages/99/b2/40f25299111d737e58b85696e91138a66c25b7334f5357e7ac2b0e8966f8/fastavro-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2de72d786eb38be6b16d556b27232b1bf1b2797ea09599507938cdb7a9fe3e7c", size = 3430328, upload-time = "2025-10-10T15:41:33.689Z" }, - { url = "https://files.pythonhosted.org/packages/e0/07/85157a7c57c5f8b95507d7829b5946561e5ee656ff80e9dd9a757f53ddaf/fastavro-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:9090f0dee63fe022ee9cc5147483366cc4171c821644c22da020d6b48f576b4f", size = 444140, upload-time = "2025-10-10T15:41:34.902Z" }, - { url = "https://files.pythonhosted.org/packages/bb/57/26d5efef9182392d5ac9f253953c856ccb66e4c549fd3176a1e94efb05c9/fastavro-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:78df838351e4dff9edd10a1c41d1324131ffecbadefb9c297d612ef5363c049a", size = 1000599, upload-time = "2025-10-10T15:41:36.554Z" }, - { url = "https://files.pythonhosted.org/packages/33/cb/8ab55b21d018178eb126007a56bde14fd01c0afc11d20b5f2624fe01e698/fastavro-1.12.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:780476c23175d2ae457c52f45b9ffa9d504593499a36cd3c1929662bf5b7b14b", size = 3335933, upload-time = "2025-10-10T15:41:39.07Z" }, - { url = "https://files.pythonhosted.org/packages/fe/03/9c94ec9bf873eb1ffb0aa694f4e71940154e6e9728ddfdc46046d7e8ced4/fastavro-1.12.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0714b285160fcd515eb0455540f40dd6dac93bdeacdb03f24e8eac3d8aa51f8d", size = 3402066, upload-time = "2025-10-10T15:41:41.608Z" }, - { url = "https://files.pythonhosted.org/packages/75/c8/cb472347c5a584ccb8777a649ebb28278fccea39d005fc7df19996f41df8/fastavro-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a8bc2dcec5843d499f2489bfe0747999108f78c5b29295d877379f1972a3d41a", size = 3240038, upload-time = "2025-10-10T15:41:43.743Z" }, - { url = "https://files.pythonhosted.org/packages/e1/77/569ce9474c40304b3a09e109494e020462b83e405545b78069ddba5f614e/fastavro-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3b1921ac35f3d89090a5816b626cf46e67dbecf3f054131f84d56b4e70496f45", size = 3369398, upload-time = "2025-10-10T15:41:45.719Z" }, - { url = "https://files.pythonhosted.org/packages/4a/1f/9589e35e9ea68035385db7bdbf500d36b8891db474063fb1ccc8215ee37c/fastavro-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:5aa777b8ee595b50aa084104cd70670bf25a7bbb9fd8bb5d07524b0785ee1699", size = 444220, upload-time = "2025-10-10T15:41:47.39Z" }, - { url = "https://files.pythonhosted.org/packages/6c/d2/78435fe737df94bd8db2234b2100f5453737cffd29adee2504a2b013de84/fastavro-1.12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c3d67c47f177e486640404a56f2f50b165fe892cc343ac3a34673b80cc7f1dd6", size = 1086611, upload-time = "2025-10-10T15:41:48.818Z" }, - { url = "https://files.pythonhosted.org/packages/b6/be/428f99b10157230ddac77ec8cc167005b29e2bd5cbe228345192bb645f30/fastavro-1.12.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5217f773492bac43dae15ff2931432bce2d7a80be7039685a78d3fab7df910bd", size = 3541001, upload-time = "2025-10-10T15:41:50.871Z" }, - { url = "https://files.pythonhosted.org/packages/16/08/a2eea4f20b85897740efe44887e1ac08f30dfa4bfc3de8962bdcbb21a5a1/fastavro-1.12.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:469fecb25cba07f2e1bfa4c8d008477cd6b5b34a59d48715e1b1a73f6160097d", size = 3432217, upload-time = "2025-10-10T15:41:53.149Z" }, - { url = "https://files.pythonhosted.org/packages/87/bb/b4c620b9eb6e9838c7f7e4b7be0762834443adf9daeb252a214e9ad3178c/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d71c8aa841ef65cfab709a22bb887955f42934bced3ddb571e98fdbdade4c609", size = 3366742, upload-time = "2025-10-10T15:41:55.237Z" }, - { url = "https://files.pythonhosted.org/packages/3d/d1/e69534ccdd5368350646fea7d93be39e5f77c614cca825c990bd9ca58f67/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b81fc04e85dfccf7c028e0580c606e33aa8472370b767ef058aae2c674a90746", size = 3383743, upload-time = "2025-10-10T15:41:57.68Z" }, - { url = "https://files.pythonhosted.org/packages/58/54/b7b4a0c3fb5fcba38128542da1b26c4e6d69933c923f493548bdfd63ab6a/fastavro-1.12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9445da127751ba65975d8e4bdabf36bfcfdad70fc35b2d988e3950cce0ec0e7c", size = 1001377, upload-time = "2025-10-10T15:41:59.241Z" }, - { url = "https://files.pythonhosted.org/packages/1e/4f/0e589089c7df0d8f57d7e5293fdc34efec9a3b758a0d4d0c99a7937e2492/fastavro-1.12.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6", size = 3320401, upload-time = "2025-10-10T15:42:01.682Z" }, - { url = "https://files.pythonhosted.org/packages/f9/19/260110d56194ae29d7e423a336fccea8bcd103196d00f0b364b732bdb84e/fastavro-1.12.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3616e2f0e1c9265e92954fa099db79c6e7817356d3ff34f4bcc92699ae99697c", size = 3350894, upload-time = "2025-10-10T15:42:04.073Z" }, - { url = "https://files.pythonhosted.org/packages/d0/96/58b0411e8be9694d5972bee3167d6c1fd1fdfdf7ce253c1a19a327208f4f/fastavro-1.12.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0337b42fd3c047fcf0e9b7597bd6ad25868de719f29da81eabb6343f08d399", size = 3229644, upload-time = "2025-10-10T15:42:06.221Z" }, - { url = "https://files.pythonhosted.org/packages/5b/db/38660660eac82c30471d9101f45b3acfdcbadfe42d8f7cdb129459a45050/fastavro-1.12.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:64961ab15b74b7c168717bbece5660e0f3d457837c3cc9d9145181d011199fa7", size = 3329704, upload-time = "2025-10-10T15:42:08.384Z" }, - { url = "https://files.pythonhosted.org/packages/9d/a9/1672910f458ecb30b596c9e59e41b7c00309b602a0494341451e92e62747/fastavro-1.12.1-cp314-cp314-win_amd64.whl", hash = "sha256:792356d320f6e757e89f7ac9c22f481e546c886454a6709247f43c0dd7058004", size = 452911, upload-time = "2025-10-10T15:42:09.795Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8d/2e15d0938ded1891b33eff252e8500605508b799c2e57188a933f0bd744c/fastavro-1.12.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:120aaf82ac19d60a1016afe410935fe94728752d9c2d684e267e5b7f0e70f6d9", size = 3541999, upload-time = "2025-10-10T15:42:11.794Z" }, - { url = "https://files.pythonhosted.org/packages/a7/1c/6dfd082a205be4510543221b734b1191299e6a1810c452b6bc76dfa6968e/fastavro-1.12.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6a3462934b20a74f9ece1daa49c2e4e749bd9a35fa2657b53bf62898fba80f5", size = 3433972, upload-time = "2025-10-10T15:42:14.485Z" }, - { url = "https://files.pythonhosted.org/packages/24/90/9de694625a1a4b727b1ad0958d220cab25a9b6cf7f16a5c7faa9ea7b2261/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1f81011d54dd47b12437b51dd93a70a9aa17b61307abf26542fc3c13efbc6c51", size = 3368752, upload-time = "2025-10-10T15:42:16.618Z" }, - { url = "https://files.pythonhosted.org/packages/fa/93/b44f67589e4d439913dab6720f7e3507b0fa8b8e56d06f6fc875ced26afb/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:43ded16b3f4a9f1a42f5970c2aa618acb23ea59c4fcaa06680bdf470b255e5a8", size = 3386636, upload-time = "2025-10-10T15:42:18.974Z" }, -] - [[package]] name = "faststream" version = "0.6.6" @@ -1092,7 +1051,6 @@ dependencies = [ { name = "asgiref" }, { name = "async-timeout" }, { name = "attrs" }, - { name = "avro-python3" }, { name = "backoff" }, { name = "beanie" }, { name = "blinker" }, @@ -1111,7 +1069,6 @@ dependencies = [ { name = "email-validator" }, { name = "exceptiongroup" }, { name = "fastapi" }, - { name = "fastavro" }, { name = "faststream", extra = ["kafka"] }, { name = "fonttools" }, { name = "frozenlist" }, @@ -1167,7 +1124,6 @@ dependencies = [ { name = "pyasn1" }, { name = "pyasn1-modules" }, { name = "pydantic" }, - { name = "pydantic-avro" }, { name = "pydantic-core" }, { name = "pygments" }, { name = "pyjwt" }, @@ -1176,7 +1132,6 @@ dependencies = [ { name = "python-dateutil" }, { name = "python-json-logger" }, { name = "python-multipart" }, - { name = "python-schema-registry-client" }, { name = "pyyaml" }, { name = "pyzmq" }, { name = "redis" }, @@ -1238,7 +1193,6 @@ requires-dist = [ { name = "asgiref", specifier = "==3.11.0" }, { name = "async-timeout", specifier = "==5.0.1" }, { name = "attrs", specifier = "==25.3.0" }, - { name = "avro-python3", specifier = "==1.10.2" }, { name = "backoff", specifier = "==2.2.1" }, { name = "beanie", specifier = "==2.0.1" }, { name = "blinker", specifier = "==1.9.0" }, @@ -1257,7 +1211,6 @@ requires-dist = [ { name = "email-validator", specifier = "==2.3.0" }, { name = "exceptiongroup", specifier = "==1.2.2" }, { name = "fastapi", specifier = "==0.128.0" }, - { name = "fastavro", specifier = "==1.12.1" }, { name = "faststream", extras = ["kafka"], specifier = "==0.6.6" }, { name = "fonttools", specifier = "==4.61.1" }, { name = "frozenlist", specifier = "==1.7.0" }, @@ -1313,7 +1266,6 @@ requires-dist = [ { name = "pyasn1", specifier = "==0.6.2" }, { name = "pyasn1-modules", specifier = "==0.4.2" }, { name = "pydantic", specifier = "==2.9.2" }, - { name = "pydantic-avro", specifier = "==0.9.1" }, { name = "pydantic-core", specifier = "==2.23.4" }, { name = "pygments", specifier = "==2.19.2" }, { name = "pyjwt", specifier = "==2.9.0" }, @@ -1322,7 +1274,6 @@ requires-dist = [ { name = "python-dateutil", specifier = "==2.9.0.post0" }, { name = "python-json-logger", specifier = "==2.0.7" }, { name = "python-multipart", specifier = "==0.0.22" }, - { name = "python-schema-registry-client", specifier = "==2.6.1" }, { name = "pyyaml", specifier = "==6.0.2" }, { name = "pyzmq", specifier = "==26.2.0" }, { name = "redis", specifier = "==7.1.0" }, @@ -1390,33 +1341,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] -[[package]] -name = "jsonschema" -version = "4.26.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "jsonschema-specifications" }, - { name = "referencing" }, - { name = "rpds-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, -] - -[[package]] -name = "jsonschema-specifications" -version = "2025.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "referencing" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, -] - [[package]] name = "kiwisolver" version = "1.4.9" @@ -2486,18 +2410,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/e4/ba44652d562cbf0bf320e0f3810206149c8a4e99cdbf66da82e97ab53a15/pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12", size = 434928, upload-time = "2024-09-17T15:59:51.827Z" }, ] -[[package]] -name = "pydantic-avro" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/8b/47ea4be231ba90984228486fe9a332cb6f18db6963d04207a1a9f310c45b/pydantic_avro-0.9.1.tar.gz", hash = "sha256:22f728340fad3353b232ec2b138496c26efb2ede5b74a2f18ab491d4ea37ec5b", size = 10015, upload-time = "2025-10-16T12:00:29.536Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/25/69/6bb45c70da28c3aa82772c136e9c87ff0498c4fd4875594ebe3f7a4cd47c/pydantic_avro-0.9.1-py3-none-any.whl", hash = "sha256:dcbec25c6f2021db594f3116dd94e029a4cb96ab63eec3dcb3ad4405b434c23a", size = 11510, upload-time = "2025-10-16T12:00:28.718Z" }, -] - [[package]] name = "pydantic-core" version = "2.23.4" @@ -2704,21 +2616,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, ] -[[package]] -name = "python-schema-registry-client" -version = "2.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "fastavro" }, - { name = "httpx" }, - { name = "jsonschema" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/00/4c/3b10063174780ee1ad97bca6c100cf9634aaba9559f03a588d721403567b/python_schema_registry_client-2.6.1.tar.gz", hash = "sha256:017fd45a36a4517d9c87c03c992393cce2c437c5ffa8fe1c9dfde1664caa89c9", size = 21360, upload-time = "2025-04-04T15:07:51.143Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/41/c1/abd18fc3c23dbe09321fcd812091320d4dc954046f95cb431ef2926cb11c/python_schema_registry_client-2.6.1-py3-none-any.whl", hash = "sha256:05950ca8f9a3409247514bef3fdb421839d6e1ae544b32dfd3b7b16237673303", size = 23095, upload-time = "2025-04-04T15:07:49.592Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -2807,20 +2704,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" }, ] -[[package]] -name = "referencing" -version = "0.37.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "rpds-py" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, -] - [[package]] name = "regex" version = "2025.8.29" @@ -2912,87 +2795,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, ] -[[package]] -name = "rpds-py" -version = "0.30.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, - { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, - { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, - { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, - { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, - { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, - { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, - { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, - { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, - { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, - { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, - { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, - { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, - { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, - { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, - { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, - { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, - { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, - { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, - { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, - { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, - { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, - { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, - { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, - { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, - { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, - { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, - { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, - { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, - { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, - { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, - { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, - { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, - { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, - { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, - { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, - { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, - { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, - { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, - { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, - { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, - { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, - { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, - { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, - { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, - { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, - { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, - { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, - { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, - { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, - { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, - { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, - { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, - { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, - { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, - { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, - { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, - { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, - { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, - { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, - { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, - { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, - { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, - { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, - { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, - { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, - { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, - { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, - { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, -] - [[package]] name = "rsa" version = "4.9" diff --git a/backend/workers/dlq_processor.py b/backend/workers/dlq_processor.py index 6f2561cc..ef195b80 100644 --- a/backend/workers/dlq_processor.py +++ b/backend/workers/dlq_processor.py @@ -5,12 +5,11 @@ from app.core.tracing import init_tracing from app.dlq.manager import DLQManager from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker from app.events.handlers import register_dlq_subscriber -from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -33,8 +32,7 @@ def main() -> None: logger.info("Tracing initialized for DLQ Processor") # Create Kafka broker and register DLQ subscriber - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_dlq_subscriber(broker, settings) # Create DI container with broker in context diff --git a/backend/workers/run_coordinator.py b/backend/workers/run_coordinator.py index d2d5ae30..3e98b79b 100644 --- a/backend/workers/run_coordinator.py +++ b/backend/workers/run_coordinator.py @@ -5,12 +5,12 @@ from app.core.logging import setup_logger from app.core.tracing import init_tracing from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker from app.events.handlers import register_coordinator_subscriber -from app.events.schema.schema_registry import SchemaRegistryManager +from app.services.idempotency import IdempotencyMiddleware from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -32,12 +32,9 @@ def main() -> None: ) logger.info("Tracing initialized for ExecutionCoordinator") - # Create Kafka broker and register subscriber - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_coordinator_subscriber(broker, settings) - # Create DI container with broker in context container = create_coordinator_container(settings, broker) setup_dishka(container, broker=broker, auto_inject=True) @@ -45,7 +42,9 @@ def main() -> None: @app.on_startup async def startup() -> None: - await container.get(Database) # triggers init_beanie inside provider + await container.get(Database) + middleware = await container.get(IdempotencyMiddleware) + broker.add_middleware(middleware) logger.info("ExecutionCoordinator infrastructure initialized") @app.on_shutdown diff --git a/backend/workers/run_event_replay.py b/backend/workers/run_event_replay.py index 81aac922..ec31c92e 100644 --- a/backend/workers/run_event_replay.py +++ b/backend/workers/run_event_replay.py @@ -5,17 +5,15 @@ from app.core.container import create_event_replay_container from app.core.logging import setup_logger from app.core.tracing import init_tracing -from app.events.broker import create_broker -from app.events.schema.schema_registry import SchemaRegistryManager from app.services.event_replay.replay_service import EventReplayService from app.settings import Settings +from faststream.kafka import KafkaBroker async def run_replay_service(settings: Settings) -> None: """Run the event replay service with DI-managed cleanup scheduler.""" tmp_logger = setup_logger(settings.LOG_LEVEL) - schema_registry = SchemaRegistryManager(settings, tmp_logger) - broker = create_broker(settings, schema_registry, tmp_logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=tmp_logger) container = create_event_replay_container(settings, broker) logger = await container.get(logging.Logger) diff --git a/backend/workers/run_k8s_worker.py b/backend/workers/run_k8s_worker.py index 457d04ab..e3c956be 100644 --- a/backend/workers/run_k8s_worker.py +++ b/backend/workers/run_k8s_worker.py @@ -5,13 +5,13 @@ from app.core.logging import setup_logger from app.core.tracing import init_tracing from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker from app.events.handlers import register_k8s_worker_subscriber -from app.events.schema.schema_registry import SchemaRegistryManager +from app.services.idempotency import IdempotencyMiddleware from app.services.k8s_worker import KubernetesWorker from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -33,12 +33,9 @@ def main() -> None: ) logger.info("Tracing initialized for KubernetesWorker") - # Create Kafka broker and register subscriber - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_k8s_worker_subscriber(broker, settings) - # Create DI container with broker in context container = create_k8s_worker_container(settings, broker) setup_dishka(container, broker=broker, auto_inject=True) @@ -46,7 +43,9 @@ def main() -> None: @app.on_startup async def startup() -> None: - await container.get(Database) # triggers init_beanie inside provider + await container.get(Database) + middleware = await container.get(IdempotencyMiddleware) + broker.add_middleware(middleware) logger.info("KubernetesWorker ready") @app.after_startup diff --git a/backend/workers/run_pod_monitor.py b/backend/workers/run_pod_monitor.py index 1f72277e..9be78252 100644 --- a/backend/workers/run_pod_monitor.py +++ b/backend/workers/run_pod_monitor.py @@ -4,12 +4,11 @@ from app.core.logging import setup_logger from app.core.tracing import init_tracing from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker -from app.events.schema.schema_registry import SchemaRegistryManager from app.services.pod_monitor.monitor import PodMonitor from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -32,8 +31,7 @@ def main() -> None: logger.info("Tracing initialized for PodMonitor Service") # Create Kafka broker (PodMonitor publishes events via KafkaEventService) - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) # Create DI container with broker in context container = create_pod_monitor_container(settings, broker) diff --git a/backend/workers/run_result_processor.py b/backend/workers/run_result_processor.py index 2a1061a2..786a235a 100644 --- a/backend/workers/run_result_processor.py +++ b/backend/workers/run_result_processor.py @@ -5,12 +5,12 @@ from app.core.logging import setup_logger from app.core.tracing import init_tracing from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker from app.events.handlers import register_result_processor_subscriber -from app.events.schema.schema_registry import SchemaRegistryManager +from app.services.idempotency import IdempotencyMiddleware from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -32,12 +32,9 @@ def main() -> None: ) logger.info("Tracing initialized for ResultProcessor Service") - # Create Kafka broker and register subscriber - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_result_processor_subscriber(broker, settings) - # Create DI container with broker in context container = create_result_processor_container(settings, broker) setup_dishka(container, broker=broker, auto_inject=True) @@ -45,7 +42,9 @@ def main() -> None: @app.on_startup async def startup() -> None: - await container.get(Database) # triggers init_beanie inside provider + await container.get(Database) + middleware = await container.get(IdempotencyMiddleware) + broker.add_middleware(middleware) logger.info("ResultProcessor infrastructure initialized") @app.on_shutdown diff --git a/backend/workers/run_saga_orchestrator.py b/backend/workers/run_saga_orchestrator.py index 4f355f86..19f47ee2 100644 --- a/backend/workers/run_saga_orchestrator.py +++ b/backend/workers/run_saga_orchestrator.py @@ -4,13 +4,13 @@ from app.core.logging import setup_logger from app.core.tracing import init_tracing from app.domain.enums.kafka import GroupId -from app.events.broker import create_broker from app.events.handlers import register_saga_subscriber -from app.events.schema.schema_registry import SchemaRegistryManager +from app.services.idempotency import IdempotencyMiddleware from app.services.saga import SagaOrchestrator from app.settings import Settings from dishka.integrations.faststream import setup_dishka from faststream import FastStream +from faststream.kafka import KafkaBroker def main() -> None: @@ -32,12 +32,9 @@ def main() -> None: ) logger.info("Tracing initialized for Saga Orchestrator Service") - # Create Kafka broker and register subscriber - schema_registry = SchemaRegistryManager(settings, logger) - broker = create_broker(settings, schema_registry, logger) + broker = KafkaBroker(settings.KAFKA_BOOTSTRAP_SERVERS, logger=logger) register_saga_subscriber(broker, settings) - # Create DI container with broker in context container = create_saga_orchestrator_container(settings, broker) setup_dishka(container, broker=broker, auto_inject=True) @@ -45,9 +42,9 @@ def main() -> None: @app.on_startup async def startup() -> None: - # Resolving SagaOrchestrator triggers Database init (via dependency) - # and starts the APScheduler timeout checker (via SagaWorkerProvider) await container.get(SagaOrchestrator) + middleware = await container.get(IdempotencyMiddleware) + broker.add_middleware(middleware) logger.info("SagaOrchestrator infrastructure initialized") @app.on_shutdown