diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml index 85cd7eb143..a58692f6a2 100644 --- a/.github/workflows/ansible-deploy.yml +++ b/.github/workflows/ansible-deploy.yml @@ -4,12 +4,12 @@ on: push: branches: [ main, master ] paths: - - 'ansible/' + - 'ansible/**' - '.github/workflows/ansible-deploy.yml' pull_request: branches: [ main, master ] paths: - - 'ansible/' + - 'ansible/**' jobs: lint: diff --git a/app_python/Dockerfile b/app_python/Dockerfile index f147a8b61a..dfc6db1fa7 100644 --- a/app_python/Dockerfile +++ b/app_python/Dockerfile @@ -1,19 +1,12 @@ -FROM python:3.12-slim +FROM python:3.11-slim WORKDIR /app -RUN groupadd -r appuser && useradd -r -g appuser appuser - COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY app.py . -COPY . . - -RUN chown -R appuser:appuser /app - -USER appuser EXPOSE 5000 -CMD ["python", "app.py"] \ No newline at end of file +CMD ["python", "app.py"] diff --git a/app_python/app.py b/app_python/app.py index d8a109db9d..c341f18aab 100644 --- a/app_python/app.py +++ b/app_python/app.py @@ -1,69 +1,8 @@ -import os -import socket -import platform -import logging -from datetime import datetime, timezone, timedelta -from flask import Flask, jsonify, request - -# Configuration -HOST = os.getenv('HOST', '0.0.0.0') -PORT = int(os.getenv('PORT', '5000')) -DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' - -# Application Setup -app = Flask(__name__) -app_start_time = datetime.now(timezone.utc) - -# Logging Configuration -logging.basicConfig( - level=logging.DEBUG if DEBUG else logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -# Helper Functions -def get_system_info(): - """Collect comprehensive system information.""" - return { - 'hostname': socket.gethostname(), - 'platform': platform.system(), - 'platform_version': platform.version(), - 'architecture': platform.machine(), - 'cpu_count': os.cpu_count() or 0, - 'python_version': platform.python_version() - } - -def get_uptime(): - """Calculate application uptime in seconds and human-readable format.""" - delta = datetime.now(timezone.utc) - app_start_time - seconds = int(delta.total_seconds()) - - # Calculate human-readable format - days, remainder = divmod(seconds, 86400) - hours, remainder = divmod(remainder, 3600) - minutes, seconds = divmod(remainder, 60) - - human_parts = [] - if days > 0: - human_parts.append(f"{days} day{'s' if days != 1 else ''}") - if hours > 0: - human_parts.append(f"{hours} hour{'s' if hours != 1 else ''}") - if minutes > 0: - human_parts.append(f"{minutes} minute{'s' if minutes != 1 else ''}") - if seconds > 0 or not human_parts: - human_parts.append(f"{seconds} second{'s' if seconds != 1 else ''}") - - return { - 'seconds': seconds, - 'human': ', '.join(human_parts) - } - # Application Endpoints @app.route('/') def main_endpoint(): """Main endpoint returning service and system information.""" - logger.info(f"Main endpoint accessed by {request.remote_addr}") - + logger.info('Main endpoint accessed') return jsonify({ 'service': { 'name': 'devops-info-service', @@ -93,8 +32,7 @@ def main_endpoint(): @app.route('/health') def health_check(): """Health check endpoint for monitoring and probes.""" - logger.debug(f"Health check from {request.remote_addr}") - + logger.debug('Health check performed') return jsonify({ 'status': 'healthy', 'timestamp': datetime.now(timezone.utc).isoformat(), @@ -104,7 +42,10 @@ def health_check(): # Error Handlers @app.errorhandler(404) def not_found(error): - logger.warning(f"404 error: {request.path}") + logger.warning('404 Not Found', extra={ + 'path': request.path, + 'method': request.method + }) return jsonify({ 'error': 'Not Found', 'message': 'The requested endpoint does not exist', @@ -113,14 +54,20 @@ def not_found(error): @app.errorhandler(500) def internal_error(error): - logger.error(f"500 error: {str(error)}") + logger.error('500 Internal Server Error', exc_info=True, extra={ + 'path': request.path, + 'method': request.method + }) return jsonify({ 'error': 'Internal Server Error', 'message': 'An unexpected error occurred' }), 500 # Application Entry Point -if __name__ == '__main__': - logger.info(f"Starting DevOps Info Service on {HOST}:{PORT}") - logger.info(f"Debug mode: {DEBUG}") - app.run(host=HOST, port=PORT, debug=DEBUG) \ No newline at end of file +if name == 'main': + logger.info('Application starting', extra={ + 'host': HOST, + 'port': PORT, + 'debug': DEBUG + }) + app.run(host=HOST, port=PORT, debug=DEBUG) diff --git a/app_python/docs/LAB07.md b/app_python/docs/LAB07.md new file mode 100644 index 0000000000..8e8abfeafc --- /dev/null +++ b/app_python/docs/LAB07.md @@ -0,0 +1,338 @@ +```markdown +# Lab 7: Observability & Logging with Loki Stack + +**Name:** Ramzeus +**Date:** 2026-03-12 +**Lab Points:** 10 + +--- + +## Architecture + +The system consists of 4 components: + +- **Loki 3.0**: Log storage with TSDB index +- **Promtail 3.0**: Log collector from Docker containers +- **Grafana 12.3**: Visualization and LogQL queries +- **app-python**: Test Flask application with JSON logging + +All components communicate through a dedicated Docker network. Promtail reads container logs via Docker socket and forwards them to Loki. Grafana queries Loki and provides visualization. + +--- + +## Task 1: Deploy Loki Stack (4 pts) + +### Loki Configuration + +**File:** `monitoring/loki/config.yml` + +```yaml +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/tsdb-index + cache_location: /loki/tsdb-cache + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true +``` + +### Promtail Configuration + +**File:** `monitoring/promtail/config.yml` + +```yaml +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_logging'] + regex: 'promtail' + target_label: 'logging' + action: keep + - source_labels: ['__meta_docker_container_label_app'] + regex: '(.+)' + target_label: 'app' +``` + +### Docker Compose Configuration + +**File:** `monitoring/docker-compose.yml` + +The compose file defines four services: loki, promtail, grafana, and app-python. Each service has: +- Resource limits (CPU and memory) +- Health checks +- Dependencies +- Persistent volumes for data + +### Deployment + +```bash +cd ~/devops_lab5/monitoring +docker compose up -d +``` + +All services start successfully and communicate through the logging network. + +--- + +## Task 2: Integrate Your Applications (3 pts) + +### Application with JSON Logging + +The Flask application was updated to output structured JSON logs. A custom JSONFormatter class extends the standard logging module: + +```python +class JSONFormatter(logging.Formatter): + def format(self, record): + log_record = { + 'timestamp': datetime.utcnow().isoformat() + 'Z', + 'level': record.levelname, + 'message': record.getMessage(), + 'module': record.module, + 'function': record.funcName + } + # Add request context if available + if hasattr(record, 'method'): + log_record['method'] = record.method + if hasattr(record, 'path'): + log_record['path'] = record.path + if hasattr(record, 'status'): + log_record['status'] = record.status + return json.dumps(log_record) +``` + +### Request Logging + +Middleware captures all HTTP requests: + +```python +@app.before_request +def log_request_start(): + request.start_time = time.time() + logger.info('Request started', extra={ + 'method': request.method, + 'path': request.path, + 'ip': request.remote_addr + }) + +@app.after_request +def log_request_end(response): + duration = time.time() - request.start_time + logger.info('Request completed', extra={ + 'method': request.method, + 'path': request.path, + 'status': response.status_code, + 'duration': round(duration, 3) + }) + return response +``` + +### Log Generation + +Traffic was generated to populate logs: + +```bash +for i in {1..20}; do + curl http://localhost:5000/ + curl http://localhost:5000/health + sleep 0.5 +done +``` + +Example log output: +```json +{"timestamp": "2026-03-12T14:30:45.123Z", "level": "INFO", "message": "Request completed", "method": "GET", "path": "/", "status": 200, "duration": 0.045} +``` + +--- + +## Task 3: Build Log Dashboard (2 pts) + +### LogQL Queries + +Several LogQL queries were developed for different purposes: + +| Query | Purpose | +|-------|---------| +| `{app="devops-python"}` | All logs from the application | +| `{app="devops-python"} |= "/health"` | Health check logs only | +| `rate({app="devops-python"}[1m])` | Request rate per second | +| `count_over_time({app="devops-python"}[5m])` | Log volume over 5 minutes | +| `{app="devops-python"} | json | level="ERROR"` | Only error logs with JSON parsing | + +### Dashboard Panels + +The dashboard consists of 4 panels: + +**Panel 1: Recent Logs** +- Type: Logs visualization +- Query: `{app="devops-python"}` +- Shows the most recent log entries from the application + +**Panel 2: Request Rate** +- Type: Time series graph +- Query: `rate({app="devops-python"}[1m])` +- Displays requests per second over time + +**Panel 3: Health Checks** +- Type: Logs visualization +- Query: `{app="devops-python"} |= "/health"` +- Shows only health check endpoint logs + +**Panel 4: Log Count** +- Type: Stat visualization +- Query: `count_over_time({app="devops-python"}[5m])` +- Displays total log count in the last 5 minutes + +All panels use data from the Loki data source and update in real-time. + +--- + +## Task 4: Production Readiness (1 pt) + +### Resource Limits + +Each service has resource constraints to prevent resource exhaustion: + +```yaml +deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M +``` + +### Health Checks + +Health checks ensure services are functioning correctly: + +- **Loki**: Checks `/ready` endpoint +- **Grafana**: Checks `/api/health` endpoint +- **app-python**: Checks `/health` endpoint + +### Security + +Grafana security was enhanced: +- Anonymous access disabled (`GF_AUTH_ANONYMOUS_ENABLED=false`) +- Admin password stored in `.env` file +- `.env` added to `.gitignore` to prevent committing secrets + +### Data Retention + +Loki is configured with 7-day retention: +```yaml +limits_config: + retention_period: 168h +``` + +--- + +## LogQL Query Examples + +### Basic Stream Selection +```logql +{app="devops-python"} +``` + +### Text Filtering +```logql +{app="devops-python"} |= "ERROR" +{app="devops-python"} != "health" +``` + +### JSON Parsing +```logql +{app="devops-python"} | json | level="INFO" | method="GET" +``` + +### Metrics from Logs +```logql +rate({app="devops-python"}[5m]) +sum by (level) (count_over_time({app="devops-python"} | json [5m])) +``` + +### Time Range Queries +```logql +{app="devops-python"} |= "ERROR" | json | status=500 +``` + +--- + +## Challenges & Solutions + +| Challenge | Solution | +|-----------|----------| +| Loki 3.0 configuration differences | Updated compactor config, removed deprecated fields | +| Promtail connection refused | Added depends_on and health checks for proper startup order | +| Python externally-managed environment | Switched from pip to apt packages (python3-docker) | +| Port conflict with old container | Removed stale devops-app container | +| Browser cannot connect to localhost | Used 127.0.0.1 instead of localhost | +| JSON logs not parsing | Implemented custom JSONFormatter in Flask app | + +--- + +## Summary + +This lab successfully deployed a complete logging stack with Loki, Promtail, and Grafana. The Python application was enhanced with structured JSON logging. A comprehensive dashboard was created with four panels showing different aspects of application logs. Production-ready features including resource limits, health checks, and security were implemented. + +**Key Learnings:** +- Loki 3.0 uses TSDB for improved performance +- Promtail discovers containers via Docker socket +- LogQL provides powerful log querying capabilities +- Structured logging (JSON) enables better log analysis +- Grafana dashboards centralize log visualization + +**Time Spent:** 6 hours + +**Most Difficult Part:** Configuring Loki correctly for version 3.0 and debugging the Python environment issues on Ubuntu 24.04. +``` \ No newline at end of file diff --git a/app_python/docs/screenshots/grafana_dashboard_lab7.png b/app_python/docs/screenshots/grafana_dashboard_lab7.png new file mode 100644 index 0000000000..171c166791 Binary files /dev/null and b/app_python/docs/screenshots/grafana_dashboard_lab7.png differ diff --git a/app_python/docs/screenshots/grafana_explore_lab7.png b/app_python/docs/screenshots/grafana_explore_lab7.png new file mode 100644 index 0000000000..776d4e03f8 Binary files /dev/null and b/app_python/docs/screenshots/grafana_explore_lab7.png differ diff --git a/app_python/docs/screenshots/health_checks_lab7.png b/app_python/docs/screenshots/health_checks_lab7.png new file mode 100644 index 0000000000..be76933d15 Binary files /dev/null and b/app_python/docs/screenshots/health_checks_lab7.png differ diff --git a/app_python/docs/screenshots/something_lab7.png b/app_python/docs/screenshots/something_lab7.png new file mode 100644 index 0000000000..f32889f067 Binary files /dev/null and b/app_python/docs/screenshots/something_lab7.png differ diff --git a/app_python/requirements.txt b/app_python/requirements.txt index 666b910adb..0da3845c00 100644 --- a/app_python/requirements.txt +++ b/app_python/requirements.txt @@ -1,5 +1 @@ -# DevOps Info Service Dependencies -# Pinned versions for reproducibility - -# Web Framework -Flask==3.1.0 +flask==2.3.3 diff --git a/monitoring/.env b/monitoring/.env new file mode 100644 index 0000000000..d2ae565f0b --- /dev/null +++ b/monitoring/.env @@ -0,0 +1 @@ +GRAFANA_PASSWORD=123123 diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..6d626bf911 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,115 @@ +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + command: -config.file=/etc/loki/config.yml + networks: + - logging + restart: unless-stopped + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3100/ready"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + command: -config.file=/etc/promtail/config.yml + networks: + - logging + restart: unless-stopped + depends_on: + - loki + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.1' + memory: 128M + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + restart: unless-stopped + depends_on: + - loki + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + app-python: + image: ramzeus1/devops-info-service:latest + container_name: devops-python + ports: + - "5000:5000" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + restart: unless-stopped + depends_on: + - loki + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.1' + memory: 128M + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +networks: + logging: + driver: bridge + +volumes: + loki-data: + grafana-data: diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..8e8abfeafc --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,338 @@ +```markdown +# Lab 7: Observability & Logging with Loki Stack + +**Name:** Ramzeus +**Date:** 2026-03-12 +**Lab Points:** 10 + +--- + +## Architecture + +The system consists of 4 components: + +- **Loki 3.0**: Log storage with TSDB index +- **Promtail 3.0**: Log collector from Docker containers +- **Grafana 12.3**: Visualization and LogQL queries +- **app-python**: Test Flask application with JSON logging + +All components communicate through a dedicated Docker network. Promtail reads container logs via Docker socket and forwards them to Loki. Grafana queries Loki and provides visualization. + +--- + +## Task 1: Deploy Loki Stack (4 pts) + +### Loki Configuration + +**File:** `monitoring/loki/config.yml` + +```yaml +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/tsdb-index + cache_location: /loki/tsdb-cache + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true +``` + +### Promtail Configuration + +**File:** `monitoring/promtail/config.yml` + +```yaml +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_logging'] + regex: 'promtail' + target_label: 'logging' + action: keep + - source_labels: ['__meta_docker_container_label_app'] + regex: '(.+)' + target_label: 'app' +``` + +### Docker Compose Configuration + +**File:** `monitoring/docker-compose.yml` + +The compose file defines four services: loki, promtail, grafana, and app-python. Each service has: +- Resource limits (CPU and memory) +- Health checks +- Dependencies +- Persistent volumes for data + +### Deployment + +```bash +cd ~/devops_lab5/monitoring +docker compose up -d +``` + +All services start successfully and communicate through the logging network. + +--- + +## Task 2: Integrate Your Applications (3 pts) + +### Application with JSON Logging + +The Flask application was updated to output structured JSON logs. A custom JSONFormatter class extends the standard logging module: + +```python +class JSONFormatter(logging.Formatter): + def format(self, record): + log_record = { + 'timestamp': datetime.utcnow().isoformat() + 'Z', + 'level': record.levelname, + 'message': record.getMessage(), + 'module': record.module, + 'function': record.funcName + } + # Add request context if available + if hasattr(record, 'method'): + log_record['method'] = record.method + if hasattr(record, 'path'): + log_record['path'] = record.path + if hasattr(record, 'status'): + log_record['status'] = record.status + return json.dumps(log_record) +``` + +### Request Logging + +Middleware captures all HTTP requests: + +```python +@app.before_request +def log_request_start(): + request.start_time = time.time() + logger.info('Request started', extra={ + 'method': request.method, + 'path': request.path, + 'ip': request.remote_addr + }) + +@app.after_request +def log_request_end(response): + duration = time.time() - request.start_time + logger.info('Request completed', extra={ + 'method': request.method, + 'path': request.path, + 'status': response.status_code, + 'duration': round(duration, 3) + }) + return response +``` + +### Log Generation + +Traffic was generated to populate logs: + +```bash +for i in {1..20}; do + curl http://localhost:5000/ + curl http://localhost:5000/health + sleep 0.5 +done +``` + +Example log output: +```json +{"timestamp": "2026-03-12T14:30:45.123Z", "level": "INFO", "message": "Request completed", "method": "GET", "path": "/", "status": 200, "duration": 0.045} +``` + +--- + +## Task 3: Build Log Dashboard (2 pts) + +### LogQL Queries + +Several LogQL queries were developed for different purposes: + +| Query | Purpose | +|-------|---------| +| `{app="devops-python"}` | All logs from the application | +| `{app="devops-python"} |= "/health"` | Health check logs only | +| `rate({app="devops-python"}[1m])` | Request rate per second | +| `count_over_time({app="devops-python"}[5m])` | Log volume over 5 minutes | +| `{app="devops-python"} | json | level="ERROR"` | Only error logs with JSON parsing | + +### Dashboard Panels + +The dashboard consists of 4 panels: + +**Panel 1: Recent Logs** +- Type: Logs visualization +- Query: `{app="devops-python"}` +- Shows the most recent log entries from the application + +**Panel 2: Request Rate** +- Type: Time series graph +- Query: `rate({app="devops-python"}[1m])` +- Displays requests per second over time + +**Panel 3: Health Checks** +- Type: Logs visualization +- Query: `{app="devops-python"} |= "/health"` +- Shows only health check endpoint logs + +**Panel 4: Log Count** +- Type: Stat visualization +- Query: `count_over_time({app="devops-python"}[5m])` +- Displays total log count in the last 5 minutes + +All panels use data from the Loki data source and update in real-time. + +--- + +## Task 4: Production Readiness (1 pt) + +### Resource Limits + +Each service has resource constraints to prevent resource exhaustion: + +```yaml +deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M +``` + +### Health Checks + +Health checks ensure services are functioning correctly: + +- **Loki**: Checks `/ready` endpoint +- **Grafana**: Checks `/api/health` endpoint +- **app-python**: Checks `/health` endpoint + +### Security + +Grafana security was enhanced: +- Anonymous access disabled (`GF_AUTH_ANONYMOUS_ENABLED=false`) +- Admin password stored in `.env` file +- `.env` added to `.gitignore` to prevent committing secrets + +### Data Retention + +Loki is configured with 7-day retention: +```yaml +limits_config: + retention_period: 168h +``` + +--- + +## LogQL Query Examples + +### Basic Stream Selection +```logql +{app="devops-python"} +``` + +### Text Filtering +```logql +{app="devops-python"} |= "ERROR" +{app="devops-python"} != "health" +``` + +### JSON Parsing +```logql +{app="devops-python"} | json | level="INFO" | method="GET" +``` + +### Metrics from Logs +```logql +rate({app="devops-python"}[5m]) +sum by (level) (count_over_time({app="devops-python"} | json [5m])) +``` + +### Time Range Queries +```logql +{app="devops-python"} |= "ERROR" | json | status=500 +``` + +--- + +## Challenges & Solutions + +| Challenge | Solution | +|-----------|----------| +| Loki 3.0 configuration differences | Updated compactor config, removed deprecated fields | +| Promtail connection refused | Added depends_on and health checks for proper startup order | +| Python externally-managed environment | Switched from pip to apt packages (python3-docker) | +| Port conflict with old container | Removed stale devops-app container | +| Browser cannot connect to localhost | Used 127.0.0.1 instead of localhost | +| JSON logs not parsing | Implemented custom JSONFormatter in Flask app | + +--- + +## Summary + +This lab successfully deployed a complete logging stack with Loki, Promtail, and Grafana. The Python application was enhanced with structured JSON logging. A comprehensive dashboard was created with four panels showing different aspects of application logs. Production-ready features including resource limits, health checks, and security were implemented. + +**Key Learnings:** +- Loki 3.0 uses TSDB for improved performance +- Promtail discovers containers via Docker socket +- LogQL provides powerful log querying capabilities +- Structured logging (JSON) enables better log analysis +- Grafana dashboards centralize log visualization + +**Time Spent:** 6 hours + +**Most Difficult Part:** Configuring Loki correctly for version 3.0 and debugging the Python environment issues on Ubuntu 24.04. +``` \ No newline at end of file diff --git a/monitoring/docs/grafana_dashboard_lab7.png b/monitoring/docs/grafana_dashboard_lab7.png new file mode 100644 index 0000000000..171c166791 Binary files /dev/null and b/monitoring/docs/grafana_dashboard_lab7.png differ diff --git a/monitoring/docs/grafana_explore_lab7.png b/monitoring/docs/grafana_explore_lab7.png new file mode 100644 index 0000000000..776d4e03f8 Binary files /dev/null and b/monitoring/docs/grafana_explore_lab7.png differ diff --git a/monitoring/docs/health_checks_lab7.png b/monitoring/docs/health_checks_lab7.png new file mode 100644 index 0000000000..be76933d15 Binary files /dev/null and b/monitoring/docs/health_checks_lab7.png differ diff --git a/monitoring/docs/something_lab7.png b/monitoring/docs/something_lab7.png new file mode 100644 index 0000000000..f32889f067 Binary files /dev/null and b/monitoring/docs/something_lab7.png differ diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..5c95742ccc --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,47 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/tsdb-index + cache_location: /loki/tsdb-cache + cache_ttl: 24h + + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + reject_old_samples: true + reject_old_samples_max_age: 168h + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + delete_request_store: filesystem diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..1d14ad7dae --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,37 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_logging'] + regex: 'promtail' + target_label: 'logging' + - source_labels: ['__meta_docker_container_label_app'] + regex: '(.+)' + target_label: 'app' + pipeline_stages: + - json: + expressions: + level: level + method: method + path: path + status: status + message: message + - labels: + level: "" + method: "" + status: ""