diff --git a/.github/workflows/python-application.yml b/.github/workflows/python-application.yml
index d625495..8fbcf76 100644
--- a/.github/workflows/python-application.yml
+++ b/.github/workflows/python-application.yml
@@ -8,12 +8,19 @@ on:
     branches: [ "main" ]
   pull_request:
     branches: [ "main" ]
+  workflow_dispatch:
+    inputs:
+      run_performance:
+        description: "Run performance benchmark tests"
+        required: false
+        default: false
+        type: boolean
 
 permissions:
   contents: read
 
 jobs:
-  build:
+  lint-and-fast-tests:
     runs-on: ubuntu-latest
 
     steps:
@@ -32,6 +39,24 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # Exit-zero treats all errors as warnings.
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
+    - name: Test with pytest (fast suite)
       run: |
         pytest
+
+  performance-tests:
+    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'workflow_dispatch' && inputs.run_performance == true }}
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.11"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ".[dev]"
+    - name: Test with pytest (performance suite)
+      run: |
+        pytest -m performance
diff --git a/AGENTS.md b/AGENTS.md
index e793726..acb0571 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,7 +6,7 @@ Guide for AI coding agents working on the Commerce System Demo project.
 
 Commerce System Demo is a FastAPI-based commerce service that provides RESTful APIs for managing products, categories, and implementing search functionality. The project includes built-in observability with OpenTelemetry metrics, logging, and distributed tracing.
 
-**Current Version**: 0.1.3 (following [Semantic Versioning](https://semver.org/))
+**Current Version**: 0.1.4 (following [Semantic Versioning](https://semver.org/))
 
 ## Setup Commands
 
@@ -79,7 +79,8 @@ app/
 
 ### Run Tests
 
-- **All tests**: `pytest` or `pytest tests/`
+- **Default fast suite**: `pytest` or `pytest tests/` (performance benchmarks are excluded by default)
+- **Performance benchmarks (opt-in)**: `pytest -m performance`
 - **Specific test file**: `pytest tests/test_api.py` or `pytest tests/test_search.py`
 - **Specific test**: `pytest tests/test_api.py::test_create_category -v`
 - **With coverage**: `pytest --cov=app --cov-report=html`
@@ -90,6 +91,7 @@ app/
 - **tests/conftest.py**: Shared fixtures (db_session, client setup)
 - **tests/test_api.py**: Integration tests for CRUD operations and search endpoints
 - **tests/test_search.py**: Service-level search functionality tests
+- **tests/test_category_service_benchmark.py**: Performance benchmark tests for category validation (marked with `@pytest.mark.performance`)
 
 ### Database for Tests
 
@@ -97,11 +99,17 @@ app/
 - Each test session gets a fresh database to avoid state leakage
 - Tests run in async mode (`asyncio_mode = "auto"`)
 
+### CI Test Execution
+
+- **Push/PR CI** runs the default fast suite and excludes `performance`-marked tests
+- **Performance CI** is opt-in via GitHub Actions `workflow_dispatch`
+- To run benchmark CI, start workflow **Python application** with input `run_performance=true`
+
 ## Building and Deployment
 
 ### Docker
 
-- **Build image**: `docker build -t commerce-system-demo:0.1.3 .`
+- **Build image**: `docker build -t commerce-system-demo:0.1.4 .`
 - **View Dockerfile**: Includes Python dependencies, migration scripts, and app code
 - **Build context**: Includes `scripts/`, `app/`, and `observability/` directories
 
@@ -184,7 +192,7 @@ This project follows [Semantic Versioning 2.0.0](https://semver.org/):
 - **MINOR**: Backward-compatible new features
 - **PATCH**: Backward-compatible bug fixes
 
-Current version is **0.1.3** (initial development). Version is defined in:
+Current version is **0.1.4** (initial development). Version is defined in:
 - `pyproject.toml` (project metadata)
 - `app/main.py` (FastAPI version)
 - `app/observability/metrics.py` (meter version)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e52447..1226fd9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Advanced filtering options for product search
 - Product images storage optimization
 
+## [0.1.4] - 2026-03-22
+
+### Changed
+
+- Replaced O(n) per-level ancestor walks in `category_depth` and `validate_no_cycles` with single recursive CTE queries
+- Category depth validation now executes one SQL statement instead of up to 100 sequential fetches
+- Cycle detection now uses a single `EXISTS` query over a recursive ancestor CTE
+
+### Added
+
+- Integration benchmark test for category validation on PostgreSQL testcontainers (`test_category_service_benchmark.py`)
+- Opt-in `performance` pytest marker to separate benchmarks from the default fast test suite
+- Dedicated `performance-tests` CI job triggered via `workflow_dispatch` with `run_performance=true`
+- Documentation in README and AGENTS.md for running performance benchmarks locally and in CI
+
 ## [0.1.3] - 2026-03-20
 
 ### Added
diff --git a/README.md b/README.md
index 2127db6..942db94 100644
--- a/README.md
+++ b/README.md
@@ -739,6 +739,16 @@ pytest tests/test_search.py -q    # Service layer tests
 pytest tests/test_api.py -q       # Endpoint integration tests
 ```
 
+Run opt-in performance benchmarks:
+
+```bash
+pytest -m performance -q
+```
+
+The default test run excludes performance benchmarks to keep CI fast.
+To run benchmarks in GitHub Actions, start the `Python application` workflow
+manually and set `run_performance` to `true`.
+
 Run with coverage:
 
 ```bash
diff --git a/app/main.py b/app/main.py
index 79b3fab..0193576 100644
--- a/app/main.py
+++ b/app/main.py
@@ -77,7 +77,7 @@ def create_app() -> FastAPI:
 
     app = FastAPI(
         title="Commerce System Demo",
-        version="0.1.3",
+        version="0.1.4",
         lifespan=lifespan,
     )
     app.router.route_class = ObservabilityRoute
diff --git a/app/observability/metrics.py b/app/observability/metrics.py
index 815bed3..01fcfbc 100644
--- a/app/observability/metrics.py
+++ b/app/observability/metrics.py
@@ -3,7 +3,7 @@
 from opentelemetry import metrics
 from opentelemetry.metrics import Counter, Histogram, UpDownCounter
 
-_meter = metrics.get_meter("commerce-system-demo-observability", version="0.1.3")
+_meter = metrics.get_meter("commerce-system-demo-observability", version="0.1.4")
 
 http_request_duration_seconds: Histogram = _meter.create_histogram(
     name="commerce_http_request_duration_seconds",
diff --git a/app/observability/setup.py b/app/observability/setup.py
index 3b35f6c..8256e43 100644
--- a/app/observability/setup.py
+++ b/app/observability/setup.py
@@ -76,7 +76,7 @@ def _build_resource(settings: Settings) -> Resource:
     """Build OpenTelemetry resource attributes from runtime settings."""
     attributes = {
         "service.name": settings.otel_service_name,
-        "service.version": "0.1.3",
+        "service.version": "0.1.4",
         "deployment.environment": settings.otel_environment,
     }
 
diff --git a/app/services/category_service.py b/app/services/category_service.py
index dc52624..eff8fee 100644
--- a/app/services/category_service.py
+++ b/app/services/category_service.py
@@ -1,10 +1,10 @@
 """Category business logic helpers and hierarchy utilities."""
 
-from sqlalchemy import Select, select
+from sqlalchemy import Select, exists, func, literal, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.models.category import Category
-from app.observability.db_timing import timed_get
+from app.observability.db_timing import timed_execute_scalar_one, timed_get
 
 MAX_CATEGORY_DEPTH = 100
 
@@ -26,31 +26,58 @@ async def get_category_or_none(session: AsyncSession, category_id: int) -> Categ
     return await timed_get(session, Category, category_id)
 
 
+def _ancestor_chain_cte(start_category_id: int, depth_limit: int | None = None):
+    """Build a recursive CTE that walks from a category to the root by parent links."""
+    ancestor_chain: Select = (
+        select(
+            Category.id.label("id"),
+            Category.parent_id.label("parent_id"),
+            literal(1).label("depth"),
+        )
+        .where(Category.id == start_category_id)
+        .cte(name="ancestor_chain", recursive=True)
+    )
+
+    category_alias = Category.__table__.alias("category_alias")
+    recursive_step = select(
+        category_alias.c.id,
+        category_alias.c.parent_id,
+        (ancestor_chain.c.depth + 1).label("depth"),
+    ).where(category_alias.c.id == ancestor_chain.c.parent_id)
+
+    if depth_limit is not None:
+        recursive_step = recursive_step.where(ancestor_chain.c.depth < depth_limit)
+
+    return ancestor_chain.union_all(recursive_step)
+
+
 async def category_depth(session: AsyncSession, parent_id: int | None) -> int:
     """Compute ancestor depth for a parent candidate in the category tree."""
-    depth = 0
-    current_parent_id = parent_id
-    while current_parent_id is not None:
-        depth += 1
-        if depth > MAX_CATEGORY_DEPTH:
-            return depth
-        parent = await timed_get(session, Category, current_parent_id)
-        if parent is None:
-            break
-        current_parent_id = parent.parent_id
-    return depth
+    if parent_id is None:
+        return 0
+
+    ancestor_chain = _ancestor_chain_cte(parent_id, depth_limit=MAX_CATEGORY_DEPTH + 1)
+    depth_statement = select(func.coalesce(func.max(ancestor_chain.c.depth), 0))
+    depth = await timed_execute_scalar_one(session, depth_statement)
+    return int(depth)
 
 
 async def validate_no_cycles(session: AsyncSession, category_id: int, new_parent_id: int | None) -> None:
     """Ensure re-parenting a category does not create a cycle."""
-    cursor = new_parent_id
-    while cursor is not None:
-        if cursor == category_id:
-            raise ValueError("Category cycle detected")
-        candidate = await timed_get(session, Category, cursor)
-        if candidate is None:
-            break
-        cursor = candidate.parent_id
+    if new_parent_id is None:
+        return
+
+    ancestor_chain = _ancestor_chain_cte(new_parent_id, depth_limit=MAX_CATEGORY_DEPTH + 1)
+    cycle_check_statement = select(
+        exists(
+            select(1)
+            .select_from(ancestor_chain)
+            .where(ancestor_chain.c.id == category_id)
+        )
+    )
+    cycle_detected = await timed_execute_scalar_one(session, cycle_check_statement)
+    if bool(cycle_detected):
+        raise ValueError("Category cycle detected")
 
 
 def category_subtree_cte(root_category_id: int):
diff --git a/pyproject.toml b/pyproject.toml
index 3ad2108..aba0bea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "commerce-system-demo"
-version = "0.1.3"
+version = "0.1.4"
 description = "FastAPI commerce service demo"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -39,6 +39,10 @@ asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"
 asyncio_default_test_loop_scope = "session"
 testpaths = ["tests"]
+addopts = "-m 'not performance'"
+markers = [
+    "performance: benchmarks and perf-sensitive integration tests (opt-in)",
+]
 
 [tool.setuptools]
 include-package-data = true
diff --git a/tests/test_category_service.py b/tests/test_category_service.py
index 4171901..45a2068 100644
--- a/tests/test_category_service.py
+++ b/tests/test_category_service.py
@@ -11,32 +11,37 @@
 @pytest.mark.asyncio
 async def test_category_depth_stops_when_parent_missing(monkeypatch: pytest.MonkeyPatch):
     session = AsyncMock()
-
-    async def fake_timed_get(_session, _model, category_id):
-        if category_id == 1:
-            return SimpleNamespace(parent_id=2)
-        return None
-
-    monkeypatch.setattr(category_service, "timed_get", fake_timed_get)
+    timed_execute_scalar_one = AsyncMock(return_value=1)
+    monkeypatch.setattr(category_service, "timed_execute_scalar_one", timed_execute_scalar_one)
 
     depth = await category_service.category_depth(session, parent_id=1)
-    assert depth == 2
+    assert depth == 1
+    timed_execute_scalar_one.assert_awaited_once()
 
 
 @pytest.mark.asyncio
-async def test_validate_no_cycles_raises_for_cycle(monkeypatch: pytest.MonkeyPatch):
+async def test_category_depth_returns_zero_for_none_parent(monkeypatch: pytest.MonkeyPatch):
     session = AsyncMock()
+    timed_execute_scalar_one = AsyncMock()
+    monkeypatch.setattr(category_service, "timed_execute_scalar_one", timed_execute_scalar_one)
+
+    depth = await category_service.category_depth(session, parent_id=None)
 
-    async def fake_timed_get(_session, _model, category_id):
-        if category_id == 2:
-            return SimpleNamespace(parent_id=1)
-        return None
+    assert depth == 0
+    timed_execute_scalar_one.assert_not_awaited()
 
-    monkeypatch.setattr(category_service, "timed_get", fake_timed_get)
+
+@pytest.mark.asyncio
+async def test_validate_no_cycles_raises_for_cycle(monkeypatch: pytest.MonkeyPatch):
+    session = AsyncMock()
+    timed_execute_scalar_one = AsyncMock(return_value=1)
+    monkeypatch.setattr(category_service, "timed_execute_scalar_one", timed_execute_scalar_one)
 
     with pytest.raises(ValueError, match="Category cycle detected"):
         await category_service.validate_no_cycles(session, category_id=1, new_parent_id=2)
 
+    timed_execute_scalar_one.assert_awaited_once()
+
 
 @pytest.mark.asyncio
 async def test_validate_category_parent_raises_not_found(monkeypatch: pytest.MonkeyPatch):
@@ -120,11 +125,11 @@ async def test_validate_category_reparent_raises_depth_error(monkeypatch: pytest
 @pytest.mark.asyncio
 async def test_category_depth_returns_when_exceeding_max(monkeypatch: pytest.MonkeyPatch):
     session = AsyncMock()
-
-    async def fake_timed_get(_session, _model, category_id):
-        return SimpleNamespace(parent_id=category_id + 1)
-
-    monkeypatch.setattr(category_service, "timed_get", fake_timed_get)
+    monkeypatch.setattr(
+        category_service,
+        "timed_execute_scalar_one",
+        AsyncMock(return_value=category_service.MAX_CATEGORY_DEPTH + 1),
+    )
 
     depth = await category_service.category_depth(session, parent_id=1)
     assert depth == category_service.MAX_CATEGORY_DEPTH + 1
@@ -133,6 +138,6 @@ async def fake_timed_get(_session, _model, category_id):
 @pytest.mark.asyncio
 async def test_validate_no_cycles_breaks_on_missing_candidate(monkeypatch: pytest.MonkeyPatch):
     session = AsyncMock()
-    monkeypatch.setattr(category_service, "timed_get", AsyncMock(return_value=None))
+    monkeypatch.setattr(category_service, "timed_execute_scalar_one", AsyncMock(return_value=0))
 
-    await category_service.validate_no_cycles(session, category_id=1, new_parent_id=2)
\ No newline at end of file
+    await category_service.validate_no_cycles(session, category_id=1, new_parent_id=2)
diff --git a/tests/test_category_service_benchmark.py b/tests/test_category_service_benchmark.py
new file mode 100644
index 0000000..9c40658
--- /dev/null
+++ b/tests/test_category_service_benchmark.py
@@ -0,0 +1,168 @@
+"""Integration benchmark tests for category hierarchy validations."""
+
+from __future__ import annotations
+
+from statistics import median
+from time import perf_counter
+
+import pytest
+
+from app.models.category import Category
+from app.observability.db_timing import timed_get
+from app.services import category_service
+
+
+async def _seed_linear_category_chain(
+    db_session, depth: int
+) -> tuple[int, int]:
+    """Create a linear chain where each node points to the previous node."""
+    categories = [
+        Category(
+            id=index,
+            name=f"bench-cat-{index}",
+            parent_id=(index - 1 if index > 1 else None),
+        )
+        for index in range(1, depth + 1)
+    ]
+    db_session.add_all(categories)
+    await db_session.commit()
+    return 1, depth
+
+
+async def _old_category_depth(
+    db_session,
+    parent_id: int | None,
+    max_depth: int = category_service.MAX_CATEGORY_DEPTH,
+) -> int:
+    """Baseline implementation: fetch one ancestor per query."""
+    depth = 0
+    current_parent_id = parent_id
+    while current_parent_id is not None:
+        depth += 1
+        if depth > max_depth:
+            return depth
+        parent = await timed_get(db_session, Category, current_parent_id)
+        if parent is None:
+            break
+        current_parent_id = parent.parent_id
+    return depth
+
+
+async def _old_validate_no_cycles(
+    db_session, category_id: int, new_parent_id: int | None
+) -> None:
+    """Baseline implementation: walk ancestor chain one query at a time."""
+    cursor = new_parent_id
+    while cursor is not None:
+        if cursor == category_id:
+            raise ValueError("Category cycle detected")
+        candidate = await timed_get(db_session, Category, cursor)
+        if candidate is None:
+            break
+        cursor = candidate.parent_id
+
+
+async def _benchmark_median_ms(
+    fn,
+    *args,
+    iterations: int = 20,
+    expect_exception: type[Exception] | None = None,
+) -> float:
+    """Return median per-call runtime in milliseconds for an async callable."""
+
+    async def run_once() -> None:
+        if expect_exception is None:
+            await fn(*args)
+            return
+        try:
+            await fn(*args)
+        except expect_exception:
+            return
+        raise AssertionError(
+            "Expected exception was not raised during benchmark run"
+        )
+
+    for _ in range(3):
+        await run_once()
+
+    samples_ms: list[float] = []
+    for _ in range(iterations):
+        started = perf_counter()
+        await run_once()
+        samples_ms.append((perf_counter() - started) * 1000)
+
+    return float(median(samples_ms))
+
+
+@pytest.mark.performance
+@pytest.mark.asyncio
+async def test_category_validation_cte_speedup_on_postgres(db_session):
+    """Verify recursive CTE validation is faster than per-level queries."""
+    depth = category_service.MAX_CATEGORY_DEPTH
+    root_id, leaf_id = await _seed_linear_category_chain(
+        db_session, depth=depth
+    )
+
+    # Sanity checks: both old and new implementations behave the same.
+    assert await _old_category_depth(db_session, leaf_id) == depth
+    assert await category_service.category_depth(db_session, leaf_id) == depth
+
+    with pytest.raises(ValueError, match="Category cycle detected"):
+        await _old_validate_no_cycles(
+            db_session,
+            category_id=root_id,
+            new_parent_id=leaf_id,
+        )
+
+    with pytest.raises(ValueError, match="Category cycle detected"):
+        await category_service.validate_no_cycles(
+            db_session,
+            category_id=root_id,
+            new_parent_id=leaf_id,
+        )
+
+    old_depth_ms = await _benchmark_median_ms(
+        _old_category_depth,
+        db_session,
+        leaf_id,
+        iterations=20,
+    )
+    new_depth_ms = await _benchmark_median_ms(
+        category_service.category_depth,
+        db_session,
+        leaf_id,
+        iterations=20,
+    )
+
+    old_cycle_ms = await _benchmark_median_ms(
+        _old_validate_no_cycles,
+        db_session,
+        root_id,
+        leaf_id,
+        iterations=20,
+        expect_exception=ValueError,
+    )
+    new_cycle_ms = await _benchmark_median_ms(
+        category_service.validate_no_cycles,
+        db_session,
+        root_id,
+        leaf_id,
+        iterations=20,
+        expect_exception=ValueError,
+    )
+
+    depth_speedup = old_depth_ms / max(new_depth_ms, 0.001)
+    cycle_speedup = old_cycle_ms / max(new_cycle_ms, 0.001)
+
+    assert depth_speedup >= 2.0, (
+        "Expected CTE depth validation to be at least 2x faster on PostgreSQL."
+        " "
+        f"old={old_depth_ms:.3f}ms new={new_depth_ms:.3f}ms "
+        f"speedup={depth_speedup:.2f}x"
+    )
+    assert cycle_speedup >= 2.0, (
+        "Expected CTE cycle validation to be at least 2x faster on PostgreSQL."
+        " "
+        f"old={old_cycle_ms:.3f}ms new={new_cycle_ms:.3f}ms "
+        f"speedup={cycle_speedup:.2f}x"
+    )