Skip to content

Commit b74206d

Browse files
committed
feat: stream EC2 build logs to CloudWatch Logs
- Add /${name_prefix}/ec2-builds CloudWatch Log Group (30d retention) - Grant ec2_builder IAM role logs:CreateLogStream/PutLogEvents on the group - Add EC2_BUILD_LOG_GROUP env var to process_build Lambda - Install amazon-cloudwatch-agent in user-data, configure it to tail /var/log/build.log and stream to CW with build_id as log stream name
1 parent ef446ab commit b74206d

File tree

4 files changed

+59
-4
lines changed

4 files changed

+59
-4
lines changed

infrastructure/lambdas/process_build/index.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
EC2_INSTANCE_TYPE = os.environ.get("EC2_INSTANCE_TYPE", "c5.xlarge")
3333
MAX_BUILD_MINUTES = int(os.environ.get("MAX_BUILD_MINUTES", "30"))
3434
PROJECT_NAME = os.environ.get("PROJECT_NAME", "lambda-layer-builder")
35+
LOG_GROUP_NAME = os.environ.get("EC2_BUILD_LOG_GROUP", "/lambda-layer-builder/prod/ec2-builds")
3536

3637

3738
def handler(event, context):
@@ -67,6 +68,7 @@ def _process_build(message):
6768
architectures=architectures,
6869
requirements=requirements,
6970
single_file=single_file,
71+
log_group_name=LOG_GROUP_NAME,
7072
)
7173

7274
# Pick a random subnet for AZ diversity
@@ -149,7 +151,7 @@ def _update_status(build_id, status, error=None):
149151
print(f"Failed to update status for {build_id}: {e}")
150152

151153

152-
def _generate_user_data(build_id, python_version, architectures, requirements, single_file):
154+
def _generate_user_data(build_id, python_version, architectures, requirements, single_file, log_group_name):
153155
"""Generate the EC2 user-data bash script for the build."""
154156
req_escaped = requirements.replace("\\", "\\\\").replace("'", "'\\''")
155157
arches_str = " ".join(architectures)
@@ -208,12 +210,43 @@ def _generate_user_data(build_id, python_version, architectures, requirements, s
208210
}}
209211
trap cleanup EXIT
210212
211-
# --- Install Docker ---
212-
echo "$(date): Installing Docker..."
213-
dnf install -y docker git aws-cli 2>/dev/null || yum install -y docker git aws-cli
213+
# --- Install Docker and CloudWatch Agent ---
214+
echo "$(date): Installing Docker and CloudWatch Agent..."
215+
dnf install -y docker git aws-cli amazon-cloudwatch-agent 2>/dev/null || yum install -y docker git aws-cli
214216
systemctl start docker
215217
systemctl enable docker
216218
219+
# --- Configure CloudWatch Logs streaming ---
220+
# Stream /var/log/build.log to CloudWatch; each build gets its own log stream.
221+
echo "$(date): Configuring CloudWatch Logs streaming..."
222+
mkdir -p /opt/aws/amazon-cloudwatch-agent/etc
223+
cat > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json << 'CWEOF'
224+
{{
225+
"logs": {{
226+
"logs_collected": {{
227+
"files": {{
228+
"collect_list": [
229+
{{
230+
"file_path": "/var/log/build.log",
231+
"log_group_name": "{log_group_name}",
232+
"log_stream_name": "{build_id}",
233+
"timezone": "UTC",
234+
"timestamp_format": "%Y-%m-%dT%H:%M:%S"
235+
}}
236+
]
237+
}}
238+
}}
239+
}}
240+
}}
241+
CWEOF
242+
243+
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl \
244+
-a fetch-config -m ec2 \
245+
-c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json \
246+
-s 2>/dev/null \
247+
&& echo "$(date): CloudWatch streaming active → {log_group_name}/{build_id}" \
248+
|| echo "$(date): WARNING: CloudWatch agent failed to start"
249+
217250
# Enable QEMU for cross-architecture builds
218251
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 2>/dev/null || true
219252

infrastructure/terraform/ec2.tf

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,17 @@
66
# Each instance self-terminates after build completion or timeout.
77
# =============================================================================
88

9+
# CloudWatch Log Group for EC2 build output
10+
# Each build writes to its own log stream (stream name = build_id).
11+
resource "aws_cloudwatch_log_group" "ec2_builds" {
12+
name = "/${local.name_prefix}/ec2-builds"
13+
retention_in_days = 30
14+
15+
tags = {
16+
Name = "${local.name_prefix}-ec2-builds"
17+
}
18+
}
19+
920
# Pre-create the EC2 Spot service-linked role so Lambda doesn't need to create it at runtime.
1021
# This role is account-global and only needs to exist once.
1122
# Terraform will silently import it if it already exists.

infrastructure/terraform/iam.tf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,16 @@ resource "aws_iam_role_policy" "ec2_builder" {
246246
"ec2:DescribeInstances",
247247
]
248248
Resource = "*"
249+
},
250+
{
251+
Effect = "Allow"
252+
Action = [
253+
"logs:CreateLogGroup",
254+
"logs:CreateLogStream",
255+
"logs:PutLogEvents",
256+
"logs:DescribeLogStreams",
257+
]
258+
Resource = "${aws_cloudwatch_log_group.ec2_builds.arn}:*"
249259
}
250260
]
251261
})

infrastructure/terraform/lambda.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ resource "aws_lambda_function" "process_build" {
8282
EC2_INSTANCE_TYPE = var.ec2_instance_type
8383
MAX_BUILD_MINUTES = tostring(var.ec2_max_build_time_minutes)
8484
PROJECT_NAME = var.project_name
85+
EC2_BUILD_LOG_GROUP = aws_cloudwatch_log_group.ec2_builds.name
8586
}
8687
}
8788

0 commit comments

Comments
 (0)