diff --git a/AGENTS.md b/AGENTS.md
index 215ad3b..e793726 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,7 +6,7 @@ Guide for AI coding agents working on the Commerce System Demo project.
 
 Commerce System Demo is a FastAPI-based commerce service that provides RESTful APIs for managing products, categories, and implementing search functionality. The project includes built-in observability with OpenTelemetry metrics, logging, and distributed tracing.
 
-**Current Version**: 0.1.2 (following [Semantic Versioning](https://semver.org/))
+**Current Version**: 0.1.3 (following [Semantic Versioning](https://semver.org/))
 
 ## Setup Commands
 
@@ -101,7 +101,7 @@ app/
 
 ### Docker
 
-- **Build image**: `docker build -t commerce-system-demo:0.1.2 .`
+- **Build image**: `docker build -t commerce-system-demo:0.1.3 .`
 - **View Dockerfile**: Includes Python dependencies, migration scripts, and app code
 - **Build context**: Includes `scripts/`, `app/`, and `observability/` directories
 
@@ -184,7 +184,7 @@ This project follows [Semantic Versioning 2.0.0](https://semver.org/):
 - **MINOR**: Backward-compatible new features
 - **PATCH**: Backward-compatible bug fixes
 
-Current version is **0.1.2** (initial development). Version is defined in:
+Current version is **0.1.3** (initial development). Version is defined in:
 - `pyproject.toml` (project metadata)
 - `app/main.py` (FastAPI version)
 - `app/observability/metrics.py` (meter version)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9e916ac..3e52447 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Advanced filtering options for product search
 - Product images storage optimization
 
+## [0.1.3] - 2026-03-20
+
+### Added
+
+- Health check endpoint now verifies database connectivity via `SELECT 1` probe
+- Retry fallback policy for health check database connection with configurable retries and timeout
+- New settings `health_check_db_retries` (default 3) and `health_check_db_timeout` (default 2.0s)
+- OpenTelemetry metrics for health checks: `commerce_health_check_total` counter and `commerce_health_check_duration_seconds` histogram
+- Integration tests for health check success, DB failure with retries, recovery on retry, and metrics recording
+
 ## [0.1.2] - 2026-03-20
 
 ### Added
diff --git a/app/core/config.py b/app/core/config.py
index 5fbcc82..020ac9a 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -39,6 +39,8 @@ class Settings(BaseSettings):
     otel_metrics_path: str = "/metrics"
     otel_trace_excluded_urls: str = ""
     log_level: str = "INFO"
+    health_check_db_retries: int = 3
+    health_check_db_timeout: float = 2.0
 
 
 @lru_cache
diff --git a/app/main.py b/app/main.py
index 05d3dd5..79b3fab 100644
--- a/app/main.py
+++ b/app/main.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse, JSONResponse
 from fastapi.templating import Jinja2Templates
 from jinja2 import TemplateNotFound
 
@@ -77,7 +77,7 @@ def create_app() -> FastAPI:
 
     app = FastAPI(
         title="Commerce System Demo",
-        version="0.1.2",
+        version="0.1.3",
         lifespan=lifespan,
     )
     app.router.route_class = ObservabilityRoute
@@ -116,7 +116,50 @@ async def home(request: Request):
 
     @app.get("/health", tags=["health"])
     async def health() -> dict[str, str]:
-        return {"status": "ok"}
+        import asyncio
+        import time
+
+        from sqlalchemy import text
+
+        from app.db.session import get_engine
+        from app.observability.metrics import health_check_duration_seconds, health_check_total
+
+        settings = get_settings()
+        retries = settings.health_check_db_retries
+        timeout = settings.health_check_db_timeout
+        engine = get_engine()
+        start = time.monotonic()
+        last_error: Exception | None = None
+
+        for attempt in range(1, retries + 1):
+            try:
+                async with asyncio.timeout(timeout):
+                    async with engine.connect() as conn:
+                        await conn.execute(text("SELECT 1"))
+                duration = time.monotonic() - start
+                health_check_total.add(1, {"status": "ok"})
+                health_check_duration_seconds.record(duration, {"status": "ok"})
+                return {"status": "ok", "database": "available"}
+            except Exception as exc:
+                last_error = exc
+                logger.warning(
+                    "health_check_db_attempt_failed",
+                    extra={"attempt": attempt, "max_retries": retries, "error": str(exc)},
+                )
+                if attempt < retries:
+                    await asyncio.sleep(0.1 * attempt)
+
+        duration = time.monotonic() - start
+        health_check_total.add(1, {"status": "error"})
+        health_check_duration_seconds.record(duration, {"status": "error"})
+        logger.error(
+            "health_check_database_failure",
+            extra={"retries_exhausted": retries, "error": str(last_error)},
+        )
+        return JSONResponse(
+            status_code=503,
+            content={"status": "error", "database": "unavailable"},
+        )
 
     return app
 
diff --git a/app/observability/metrics.py b/app/observability/metrics.py
index 1b54f8f..815bed3 100644
--- a/app/observability/metrics.py
+++ b/app/observability/metrics.py
@@ -3,7 +3,7 @@
 from opentelemetry import metrics
 from opentelemetry.metrics import Counter, Histogram, UpDownCounter
 
-_meter = metrics.get_meter("commerce-system-demo-observability", version="0.1.2")
+_meter = metrics.get_meter("commerce-system-demo-observability", version="0.1.3")
 
 http_request_duration_seconds: Histogram = _meter.create_histogram(
     name="commerce_http_request_duration_seconds",
@@ -100,3 +100,15 @@
     unit="1",
     description="Total number of category validation failures",
 )
+
+health_check_total: Counter = _meter.create_counter(
+    name="commerce_health_check_total",
+    unit="1",
+    description="Total number of health check requests",
+)
+
+health_check_duration_seconds: Histogram = _meter.create_histogram(
+    name="commerce_health_check_duration_seconds",
+    unit="s",
+    description="Duration of health check requests including database probe",
+)
diff --git a/app/observability/setup.py b/app/observability/setup.py
index 47311e2..3b35f6c 100644
--- a/app/observability/setup.py
+++ b/app/observability/setup.py
@@ -76,7 +76,7 @@ def _build_resource(settings: Settings) -> Resource:
     """Build OpenTelemetry resource attributes from runtime settings."""
     attributes = {
         "service.name": settings.otel_service_name,
-        "service.version": "0.1.2",
+        "service.version": "0.1.3",
         "deployment.environment": settings.otel_environment,
     }
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 0323d5f..b5ab359 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -52,6 +52,12 @@ services:
       OTEL_TRACE_EXCLUDED_URLS: /metrics,/health
     ports:
       - "8000:8000"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
     depends_on:
       db:
         condition: service_healthy
@@ -112,7 +118,8 @@ services:
       - ./observability/prometheus.yml:/etc/prometheus/prometheus.yml:ro
       - ./observability/prometheus-alerts:/etc/prometheus/alerts:ro
     depends_on:
-      - app
+      app:
+        condition: service_healthy
     ports:
       - "9090:9090"
 
diff --git a/observability/grafana/dashboards/commerce-observability.json b/observability/grafana/dashboards/commerce-observability.json
index c32ced8..cb11177 100644
--- a/observability/grafana/dashboards/commerce-observability.json
+++ b/observability/grafana/dashboards/commerce-observability.json
@@ -166,6 +166,56 @@
           "legendFormat": "{{le}}"
         }
       ]
+    },
+    {
+      "id": 11,
+      "type": "timeseries",
+      "title": "Health Check Rate by Status (req/s)",
+      "gridPos": {"h": 8, "w": 8, "x": 0, "y": 32},
+      "datasource": {"type": "prometheus", "uid": "prometheus"},
+      "targets": [
+        {
+          "refId": "A",
+          "expr": "sum(rate(commerce_health_check_total[5m])) by (status)",
+          "legendFormat": "{{status}}"
+        }
+      ]
+    },
+    {
+      "id": 12,
+      "type": "timeseries",
+      "title": "Health Check Failure Ratio (%)",
+      "gridPos": {"h": 8, "w": 8, "x": 8, "y": 32},
+      "datasource": {"type": "prometheus", "uid": "prometheus"},
+      "fieldConfig": {
+        "defaults": {"unit": "percentunit", "min": 0, "max": 1},
+        "overrides": []
+      },
+      "targets": [
+        {
+          "refId": "A",
+          "expr": "sum(rate(commerce_health_check_total{status=\"error\"}[5m])) / sum(rate(commerce_health_check_total[5m]))",
+          "legendFormat": "failure ratio"
+        }
+      ]
+    },
+    {
+      "id": 13,
+      "type": "timeseries",
+      "title": "P95 Health Check Duration (ms)",
+      "gridPos": {"h": 8, "w": 8, "x": 16, "y": 32},
+      "datasource": {"type": "prometheus", "uid": "prometheus"},
+      "fieldConfig": {
+        "defaults": {"unit": "ms"},
+        "overrides": []
+      },
+      "targets": [
+        {
+          "refId": "A",
+          "expr": "histogram_quantile(0.95, sum(rate(commerce_health_check_duration_seconds_bucket[5m])) by (le)) * 1000",
+          "legendFormat": "p95"
+        }
+      ]
     }
   ]
 }
diff --git a/observability/prometheus-alerts/commerce-alerts.yml b/observability/prometheus-alerts/commerce-alerts.yml
index fb3859d..2dc7dae 100644
--- a/observability/prometheus-alerts/commerce-alerts.yml
+++ b/observability/prometheus-alerts/commerce-alerts.yml
@@ -63,3 +63,18 @@ groups:
         annotations:
           summary: "Database pool pressure"
           description: "DB pool in-use connections are above 12 for at least 10 minutes."
+
+      - alert: CommerceHealthCheckFailures
+        expr: |
+          (
+            sum(rate(commerce_health_check_total{status="error"}[5m]))
+            /
+            clamp_min(sum(rate(commerce_health_check_total[5m])), 0.001)
+          ) > 0.5
+        for: 5m
+        labels:
+          severity: critical
+          priority: p1
+        annotations:
+          summary: "Health check database failures"
+          description: "More than 50% of health checks are failing for at least 5 minutes."
diff --git a/pyproject.toml b/pyproject.toml
index 09d2c4d..3ad2108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "commerce-system-demo"
-version = "0.1.2"
+version = "0.1.3"
 description = "FastAPI commerce service demo"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/tests/test_api.py b/tests/test_api.py
index 743edf0..dd73b45 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -35,10 +35,148 @@ async def override_get_session():
 
 @pytest.mark.asyncio
 async def test_health_endpoint(client: AsyncClient):
-    """Test the health check endpoint."""
+    """Test the health check endpoint returns ok with database available."""
     response = await client.get("/health")
     assert response.status_code == 200
-    assert response.json() == {"status": "ok"}
+    data = response.json()
+    assert data["status"] == "ok"
+    assert data["database"] == "available"
+
+
+@pytest.mark.asyncio
+async def test_health_endpoint_database_unavailable(db_session: AsyncSession):
+    """Test the health check reports error after all retries are exhausted."""
+    from unittest.mock import AsyncMock, patch
+
+    app = create_app()
+
+    async def override_get_session():
+        yield db_session
+
+    app.dependency_overrides[get_session] = override_get_session
+
+    mock_engine = AsyncMock()
+    mock_engine.connect = AsyncMock(side_effect=Exception("connection refused"))
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as ac:
+        with patch("app.db.session.get_engine", return_value=mock_engine):
+            response = await ac.get("/health")
+
+    app.dependency_overrides.clear()
+
+    assert response.status_code == 503
+    data = response.json()
+    assert data["status"] == "error"
+    assert data["database"] == "unavailable"
+    # Default retries is 3 — engine.connect should be called 3 times
+    assert mock_engine.connect.call_count == 3
+
+
+@pytest.mark.asyncio
+async def test_health_endpoint_database_recovers_on_retry(db_session: AsyncSession):
+    """Test that health check succeeds when DB fails first then recovers."""
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    app = create_app()
+
+    async def override_get_session():
+        yield db_session
+
+    app.dependency_overrides[get_session] = override_get_session
+
+    # First call fails, second call succeeds
+    mock_conn = AsyncMock()
+    mock_conn.execute = AsyncMock(return_value=None)
+    mock_conn.__aenter__ = AsyncMock(return_value=mock_conn)
+    mock_conn.__aexit__ = AsyncMock(return_value=False)
+
+    mock_engine = AsyncMock()
+    mock_engine.connect = MagicMock(
+        side_effect=[Exception("transient error"), MagicMock(
+            __aenter__=AsyncMock(return_value=mock_conn),
+            __aexit__=AsyncMock(return_value=False),
+        )]
+    )
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as ac:
+        with patch("app.db.session.get_engine", return_value=mock_engine):
+            response = await ac.get("/health")
+
+    app.dependency_overrides.clear()
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "ok"
+    assert data["database"] == "available"
+    assert mock_engine.connect.call_count == 2
+
+
+@pytest.mark.asyncio
+async def test_health_endpoint_metrics_recorded_on_success(db_session: AsyncSession):
+    """Test that health check metrics are recorded on successful check."""
+    from unittest.mock import MagicMock, patch
+
+    app = create_app()
+
+    async def override_get_session():
+        yield db_session
+
+    app.dependency_overrides[get_session] = override_get_session
+
+    mock_counter = MagicMock()
+    mock_histogram = MagicMock()
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as ac:
+        with patch("app.observability.metrics.health_check_total", mock_counter), \
+             patch("app.observability.metrics.health_check_duration_seconds", mock_histogram):
+            response = await ac.get("/health")
+
+    app.dependency_overrides.clear()
+
+    assert response.status_code == 200
+    assert response.json()["status"] == "ok"
+    mock_counter.add.assert_called_once_with(1, {"status": "ok"})
+    mock_histogram.record.assert_called_once()
+    record_args = mock_histogram.record.call_args
+    assert record_args[0][1] == {"status": "ok"}
+
+
+@pytest.mark.asyncio
+async def test_health_endpoint_metrics_recorded_on_failure(db_session: AsyncSession):
+    """Test that health check metrics are recorded on DB failure."""
+    from unittest.mock import AsyncMock, MagicMock, call, patch
+
+    app = create_app()
+
+    async def override_get_session():
+        yield db_session
+
+    app.dependency_overrides[get_session] = override_get_session
+
+    mock_engine = AsyncMock()
+    mock_engine.connect = AsyncMock(side_effect=Exception("connection refused"))
+
+    mock_counter = MagicMock()
+    mock_histogram = MagicMock()
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as ac:
+        with patch("app.db.session.get_engine", return_value=mock_engine), \
+             patch("app.observability.metrics.health_check_total", mock_counter), \
+             patch("app.observability.metrics.health_check_duration_seconds", mock_histogram):
+            response = await ac.get("/health")
+
+    app.dependency_overrides.clear()
+
+    assert response.status_code == 503
+    assert response.json()["status"] == "error"
+    mock_counter.add.assert_called_once_with(1, {"status": "error"})
+    mock_histogram.record.assert_called_once()
+    record_args = mock_histogram.record.call_args
+    assert record_args[1] == {"status": "error"} or record_args[0][1] == {"status": "error"}
 
 
 @pytest.mark.asyncio