From f5212d0a066c1fae005521d638fec6778f3b2d9b Mon Sep 17 00:00:00 2001
From: Mahima Singh <105724608+smahima27@users.noreply.github.com>
Date: Tue, 26 May 2026 15:06:03 +0530
Subject: [PATCH 1/4] feat: add 9 new waste detectors, extended CSV data, and
 AI dashboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New detection rules in detection_engine.py:
- detect_nat_idle: flag NAT Gateways with <1 GB/week traffic
- detect_idle_load_balancers: flag ALBs/NLBs with 0 target groups or requests
- detect_old_gen_instances: flag t2/m4/c4/r4 families → recommend t3/m5/c5/r5
- detect_orphan_snapshots: flag EBS snapshots >90 days old with no source volume
- detect_gp2_volumes: flag gp2 volumes for migration to gp3 (20% cheaper)
- detect_ondemand_no_coverage: flag EC2 on-demand >30d without RI/SP (40% saving)
- detect_infinite_log_retention: flag CloudWatch log groups with infinite retention (70% saving)
- detect_stopped_ec2_with_ebs: flag stopped EC2 still paying for attached EBS
- detect_underutilized_cache_redshift: flag ElastiCache/Redshift at <10% CPU

Also:
- Extended aws_cost_data.csv with 37 new sample rows covering all new resource types
- Fixed detect_idle_ec2 to exclude stopped instances (prevents double-counting)
- Added dashboard_AI.py: enhanced Streamlit dashboard with embedded Claude AI chatbot,
  Slack webhook alerts, dark-themed findings cards, and executive summary panel

Results: 77 findings · $7,686/mo · $92,238/yr"
---
 aws_cost_data.csv   | 159 +++++++++++--------
 dashboard_AI.py     | 370 ++++++++++++++++++++++++++++++++++++++++++++
 detection_engine.py | 292 +++++++++++++++++++++++++++++++++-
 3 files changed, 758 insertions(+), 63 deletions(-)
 create mode 100644 dashboard_AI.py

diff --git a/aws_cost_data.csv b/aws_cost_data.csv
index 8e227fa..f105813 100644
--- a/aws_cost_data.csv
+++ b/aws_cost_data.csv
@@ -1,61 +1,98 @@
-resource_id,resource_name,service,resource_type,region,team,environment,cpu_avg_7d,memory_avg_7d,daily_cost_usd,monthly_cost_usd,days_running,last_accessed,status,tags
-i-09963334018,platform-dev-ec2-01,EC2,t3.large,us-east-1,platform,dev,3.22,7.86,2.0,60.0,34,2026-03-09,running,"team:platform,env:dev"
-i-02801823908,payments-staging-ec2-02,EC2,t3.large,us-east-1,payments,staging,49.04,58.03,2.0,60.0,67,2026-03-02,running,"team:payments,env:staging"
-i-01438989805,payments-sandbox-ec2-03,EC2,t3.medium,us-east-1,payments,sandbox,1.3,13.7,1.0,30.0,46,2026-04-04,running,"team:payments,env:sandbox"
-i-02625792787,frontend-dev-ec2-04,EC2,r5.2xlarge,eu-west-1,frontend,dev,2.2,31.39,12.1,363.0,106,2026-05-02,running,"team:frontend,env:dev"
-i-05231494220,frontend-staging-ec2-05,EC2,r5.2xlarge,ap-south-1,frontend,staging,2.95,82.98,12.1,363.0,93,2026-04-10,running,"team:frontend,env:staging"
-i-03783290795,data-eng-sandbox-ec2-06,EC2,m5.4xlarge,eu-west-1,data-eng,sandbox,22.74,62.15,18.43,552.9,84,2026-04-09,running,"team:data-eng,env:sandbox"
-i-01240251661,payments-sandbox-ec2-07,EC2,m5.2xlarge,ap-south-1,payments,sandbox,3.92,77.45,9.22,276.6,51,2026-03-07,running,"team:payments,env:sandbox"
-i-07439149233,data-eng-prod-ec2-08,EC2,m5.4xlarge,eu-west-1,data-eng,prod,49.43,46.3,18.43,552.9,30,2026-04-14,running,"team:data-eng,env:prod"
-i-07801222128,payments-staging-ec2-09,EC2,c5.4xlarge,us-east-1,payments,staging,29.34,24.74,16.32,489.6,98,2026-03-11,running,"team:payments,env:staging"
-i-03694860228,frontend-sandbox-ec2-10,EC2,m5.xlarge,eu-west-1,frontend,sandbox,68.83,42.99,4.61,138.3,14,2026-05-05,running,"team:frontend,env:sandbox"
-i-09639245200,ml-ops-sandbox-ec2-11,EC2,t3.large,eu-west-1,ml-ops,sandbox,46.92,15.31,2.0,60.0,79,2026-03-15,running,"team:ml-ops,env:sandbox"
-i-04272602734,data-eng-prod-ec2-12,EC2,c5.4xlarge,eu-west-1,data-eng,prod,2.18,87.14,16.32,489.6,40,2026-04-14,running,"team:data-eng,env:prod"
-i-04944899549,frontend-staging-ec2-13,EC2,c5.4xlarge,eu-west-1,frontend,staging,1.1,34.17,16.32,489.6,22,2026-03-11,running,"team:frontend,env:staging"
-i-01248786714,platform-dev-ec2-14,EC2,t3.medium,eu-west-1,platform,dev,65.25,90.23,1.0,30.0,115,2026-04-24,running,"team:platform,env:dev"
-i-03361388464,ml-ops-prod-ec2-15,EC2,t3.large,us-east-1,ml-ops,prod,20.3,52.49,2.0,60.0,100,2026-03-01,running,"team:ml-ops,env:prod"
-i-07517938612,payments-staging-ec2-16,EC2,m5.4xlarge,ap-south-1,payments,staging,4.18,27.31,18.43,552.9,94,2026-03-31,running,"team:payments,env:staging"
-i-09894847574,data-eng-prod-ec2-17,EC2,t3.large,eu-west-1,data-eng,prod,50.84,10.3,2.0,60.0,78,2026-05-24,running,"team:data-eng,env:prod"
-i-03030106617,platform-dev-ec2-18,EC2,t3.large,us-east-1,platform,dev,30.38,77.67,2.0,60.0,12,2026-03-23,running,"team:platform,env:dev"
-i-09822598054,platform-prod-ec2-19,EC2,m5.2xlarge,eu-west-1,platform,prod,2.07,93.57,9.22,276.6,87,2026-03-19,running,"team:platform,env:prod"
-i-01822873088,platform-sandbox-ec2-20,EC2,t3.large,eu-west-1,platform,sandbox,3.69,45.38,2.0,60.0,96,2026-04-16,running,"team:platform,env:sandbox"
-i-01420514789,payments-dev-ec2-21,EC2,m5.4xlarge,eu-west-1,payments,dev,3.92,94.94,18.43,552.9,62,2026-03-22,running,"team:payments,env:dev"
-i-04875962612,platform-staging-ec2-22,EC2,t3.medium,ap-south-1,platform,staging,2.28,39.11,1.0,30.0,24,2026-03-27,running,"team:platform,env:staging"
-i-03090237817,data-eng-sandbox-ec2-23,EC2,m5.4xlarge,ap-south-1,data-eng,sandbox,4.04,31.7,18.43,552.9,39,2026-03-07,running,"team:data-eng,env:sandbox"
-i-01676168421,frontend-prod-ec2-24,EC2,t3.medium,eu-west-1,frontend,prod,0.69,12.21,1.0,30.0,98,2026-03-17,running,"team:frontend,env:prod"
-i-03553440342,platform-prod-ec2-25,EC2,m5.xlarge,eu-west-1,platform,prod,38.65,12.38,4.61,138.3,89,2026-03-13,running,"team:platform,env:prod"
-vol-08370987661,frontend-dev-ebs-01,EBS,gp3-1000GB,ap-south-1,frontend,dev,0,0,3.33,100.0,71,2026-03-15,available (unattached),"team:frontend,env:dev"
-vol-08067372072,ml-ops-staging-ebs-02,EBS,gp3-200GB,us-east-1,ml-ops,staging,0,0,0.67,20.0,176,2025-11-30,available (unattached),"team:ml-ops,env:staging"
-vol-01429416213,platform-prod-ebs-03,EBS,gp3-200GB,eu-west-1,platform,prod,0,0,0.67,20.0,122,2026-01-23,available (unattached),"team:platform,env:prod"
-vol-09303285822,frontend-dev-ebs-04,EBS,gp3-100GB,ap-south-1,frontend,dev,0,0,0.33,10.0,38,2026-04-17,available (unattached),"team:frontend,env:dev"
-vol-03271782991,data-eng-dev-ebs-05,EBS,gp3-100GB,eu-west-1,data-eng,dev,0,0,0.33,10.0,45,2026-04-10,available (unattached),"team:data-eng,env:dev"
-vol-02135872495,data-eng-prod-ebs-06,EBS,gp3-1000GB,ap-south-1,data-eng,prod,0,0,3.33,100.0,39,2026-04-16,available (unattached),"team:data-eng,env:prod"
-vol-03597724331,payments-dev-ebs-07,EBS,gp3-1000GB,eu-west-1,payments,dev,0,0,3.33,100.0,77,2026-03-09,available (unattached),"team:payments,env:dev"
-vol-07393195616,payments-dev-ebs-08,EBS,gp3-200GB,ap-south-1,payments,dev,0,0,0.67,20.0,134,2026-01-11,available (unattached),"team:payments,env:dev"
-vol-05310195918,ml-ops-dev-ebs-09,EBS,gp3-50GB,ap-south-1,ml-ops,dev,0,0,0.17,5.0,16,2026-05-09,available (unattached),"team:ml-ops,env:dev"
-vol-08325785916,payments-sandbox-ebs-10,EBS,gp3-200GB,eu-west-1,payments,sandbox,0,0,0.67,20.0,146,2025-12-30,available (unattached),"team:payments,env:sandbox"
-vol-03343292475,platform-prod-ebs-11,EBS,gp3-50GB,us-east-1,platform,prod,0,0,0.17,5.0,43,2026-04-12,available (unattached),"team:platform,env:prod"
-vol-01547374338,frontend-staging-ebs-12,EBS,gp3-1000GB,us-east-1,frontend,staging,0,0,3.33,100.0,115,2026-01-30,available (unattached),"team:frontend,env:staging"
-vol-03929454134,platform-dev-ebs-13,EBS,gp3-200GB,eu-west-1,platform,dev,0,0,0.67,20.0,58,2026-03-28,available (unattached),"team:platform,env:dev"
-db-DAT4878,data-eng-sandbox-rds-01,RDS,db.t3.medium,ap-south-1,data-eng,sandbox,43.73,78.33,1.63,48.9,55,2026-04-29,available,"team:data-eng,env:sandbox"
-db-PAY5065,payments-dev-rds-02,RDS,db.t3.medium,us-east-1,payments,dev,28.53,21.14,1.63,48.9,189,2026-05-19,available,"team:payments,env:dev"
-db-PLA8541,platform-sandbox-rds-03,RDS,db.r5.2xlarge,us-east-1,platform,sandbox,2.91,31.36,23.04,691.2,68,2026-05-11,available,"team:platform,env:sandbox"
-db-PAY5573,payments-sandbox-rds-04,RDS,db.t3.medium,us-east-1,payments,sandbox,3.07,54.9,1.63,48.9,112,2026-04-12,available,"team:payments,env:sandbox"
-db-PLA5349,platform-prod-rds-05,RDS,db.m5.xlarge,ap-south-1,platform,prod,21.75,17.59,8.21,246.3,121,2026-05-03,available,"team:platform,env:prod"
-db-ML-5173,ml-ops-prod-rds-06,RDS,db.m5.xlarge,ap-south-1,ml-ops,prod,35.95,59.61,8.21,246.3,10,2026-04-22,available,"team:ml-ops,env:prod"
-db-DAT6409,data-eng-sandbox-rds-07,RDS,db.m5.large,eu-west-1,data-eng,sandbox,2.43,31.97,4.1,123.0,41,2026-05-06,available,"team:data-eng,env:sandbox"
-db-ML-3085,ml-ops-dev-rds-08,RDS,db.m5.xlarge,ap-south-1,ml-ops,dev,23.3,39.43,8.21,246.3,107,2026-04-12,available,"team:ml-ops,env:dev"
-s3-frontend-dev-bucket-01,frontend-dev-s3-01,S3,Standard-1625GB,us-east-1,frontend,dev,0,0,1.25,37.38,310,2026-04-04,active,"team:frontend,env:dev"
-s3-data-eng-dev-bucket-02,data-eng-dev-s3-02,S3,Standard-203GB,us-east-1,data-eng,dev,0,0,0.16,4.67,250,2026-04-29,active,"team:data-eng,env:dev"
-s3-frontend-dev-bucket-03,frontend-dev-s3-03,S3,Standard-4951GB,us-east-1,frontend,dev,0,0,3.8,113.87,256,2026-03-27,active,"team:frontend,env:dev"
-s3-payments-sandbox-bucket-04,payments-sandbox-s3-04,S3,Standard-3822GB,us-east-1,payments,sandbox,0,0,2.93,87.91,116,2026-02-13,active,"team:payments,env:sandbox"
-s3-data-eng-dev-bucket-05,data-eng-dev-s3-05,S3,Standard-894GB,us-east-1,data-eng,dev,0,0,0.69,20.56,150,2026-05-14,active,"team:data-eng,env:dev"
-s3-payments-staging-bucket-06,payments-staging-s3-06,S3,Standard-2742GB,us-east-1,payments,staging,0,0,2.1,63.07,42,2026-05-07,active,"team:payments,env:staging"
-s3-payments-sandbox-bucket-07,payments-sandbox-s3-07,S3,Standard-578GB,us-east-1,payments,sandbox,0,0,0.44,13.29,67,2026-03-08,active,"team:payments,env:sandbox"
-s3-ml-ops-staging-bucket-08,ml-ops-staging-s3-08,S3,Standard-3930GB,us-east-1,ml-ops,staging,0,0,3.01,90.39,226,2026-02-23,active,"team:ml-ops,env:staging"
-eipalloc-063640499,ml-ops-unused-eip-01,Elastic IP,eip-unassociated,ap-south-1,ml-ops,staging,0,0,0.12,3.6,88,2026-05-15,unassociated,team:ml-ops
-eipalloc-067062156,platform-unused-eip-02,Elastic IP,eip-unassociated,ap-south-1,platform,staging,0,0,0.12,3.6,71,2026-03-17,unassociated,team:platform
-eipalloc-084813739,platform-unused-eip-03,Elastic IP,eip-unassociated,ap-south-1,platform,prod,0,0,0.12,3.6,63,2026-04-28,unassociated,team:platform
-eipalloc-099600766,ml-ops-unused-eip-04,Elastic IP,eip-unassociated,eu-west-1,ml-ops,dev,0,0,0.12,3.6,61,2026-02-26,unassociated,team:ml-ops
-eipalloc-067276174,frontend-unused-eip-05,Elastic IP,eip-unassociated,eu-west-1,frontend,sandbox,0,0,0.12,3.6,25,2026-03-16,unassociated,team:frontend
-eipalloc-044788100,ml-ops-unused-eip-06,Elastic IP,eip-unassociated,ap-south-1,ml-ops,dev,0,0,0.12,3.6,71,2026-03-14,unassociated,team:ml-ops
+resource_id,resource_name,service,resource_type,region,team,environment,cpu_avg_7d,memory_avg_7d,daily_cost_usd,monthly_cost_usd,days_running,last_accessed,status,tags
+i-09963334018,platform-dev-ec2-01,EC2,t3.large,us-east-1,platform,dev,3.22,7.86,2.0,60.0,34,2026-03-09,running,"team:platform,env:dev"
+i-02801823908,payments-staging-ec2-02,EC2,t3.large,us-east-1,payments,staging,49.04,58.03,2.0,60.0,67,2026-03-02,running,"team:payments,env:staging"
+i-01438989805,payments-sandbox-ec2-03,EC2,t3.medium,us-east-1,payments,sandbox,1.3,13.7,1.0,30.0,46,2026-04-04,running,"team:payments,env:sandbox"
+i-02625792787,frontend-dev-ec2-04,EC2,r5.2xlarge,eu-west-1,frontend,dev,2.2,31.39,12.1,363.0,106,2026-05-02,running,"team:frontend,env:dev"
+i-05231494220,frontend-staging-ec2-05,EC2,r5.2xlarge,ap-south-1,frontend,staging,2.95,82.98,12.1,363.0,93,2026-04-10,running,"team:frontend,env:staging"
+i-03783290795,data-eng-sandbox-ec2-06,EC2,m5.4xlarge,eu-west-1,data-eng,sandbox,22.74,62.15,18.43,552.9,84,2026-04-09,running,"team:data-eng,env:sandbox"
+i-01240251661,payments-sandbox-ec2-07,EC2,m5.2xlarge,ap-south-1,payments,sandbox,3.92,77.45,9.22,276.6,51,2026-03-07,running,"team:payments,env:sandbox"
+i-07439149233,data-eng-prod-ec2-08,EC2,m5.4xlarge,eu-west-1,data-eng,prod,49.43,46.3,18.43,552.9,30,2026-04-14,running,"team:data-eng,env:prod"
+i-07801222128,payments-staging-ec2-09,EC2,c5.4xlarge,us-east-1,payments,staging,29.34,24.74,16.32,489.6,98,2026-03-11,running,"team:payments,env:staging"
+i-03694860228,frontend-sandbox-ec2-10,EC2,m5.xlarge,eu-west-1,frontend,sandbox,68.83,42.99,4.61,138.3,14,2026-05-05,running,"team:frontend,env:sandbox"
+i-09639245200,ml-ops-sandbox-ec2-11,EC2,t3.large,eu-west-1,ml-ops,sandbox,46.92,15.31,2.0,60.0,79,2026-03-15,running,"team:ml-ops,env:sandbox"
+i-04272602734,data-eng-prod-ec2-12,EC2,c5.4xlarge,eu-west-1,data-eng,prod,2.18,87.14,16.32,489.6,40,2026-04-14,running,"team:data-eng,env:prod"
+i-04944899549,frontend-staging-ec2-13,EC2,c5.4xlarge,eu-west-1,frontend,staging,1.1,34.17,16.32,489.6,22,2026-03-11,running,"team:frontend,env:staging"
+i-01248786714,platform-dev-ec2-14,EC2,t3.medium,eu-west-1,platform,dev,65.25,90.23,1.0,30.0,115,2026-04-24,running,"team:platform,env:dev"
+i-03361388464,ml-ops-prod-ec2-15,EC2,t3.large,us-east-1,ml-ops,prod,20.3,52.49,2.0,60.0,100,2026-03-01,running,"team:ml-ops,env:prod"
+i-07517938612,payments-staging-ec2-16,EC2,m5.4xlarge,ap-south-1,payments,staging,4.18,27.31,18.43,552.9,94,2026-03-31,running,"team:payments,env:staging"
+i-09894847574,data-eng-prod-ec2-17,EC2,t3.large,eu-west-1,data-eng,prod,50.84,10.3,2.0,60.0,78,2026-05-24,running,"team:data-eng,env:prod"
+i-03030106617,platform-dev-ec2-18,EC2,t3.large,us-east-1,platform,dev,30.38,77.67,2.0,60.0,12,2026-03-23,running,"team:platform,env:dev"
+i-09822598054,platform-prod-ec2-19,EC2,m5.2xlarge,eu-west-1,platform,prod,2.07,93.57,9.22,276.6,87,2026-03-19,running,"team:platform,env:prod"
+i-01822873088,platform-sandbox-ec2-20,EC2,t3.large,eu-west-1,platform,sandbox,3.69,45.38,2.0,60.0,96,2026-04-16,running,"team:platform,env:sandbox"
+i-01420514789,payments-dev-ec2-21,EC2,m5.4xlarge,eu-west-1,payments,dev,3.92,94.94,18.43,552.9,62,2026-03-22,running,"team:payments,env:dev"
+i-04875962612,platform-staging-ec2-22,EC2,t3.medium,ap-south-1,platform,staging,2.28,39.11,1.0,30.0,24,2026-03-27,running,"team:platform,env:staging"
+i-03090237817,data-eng-sandbox-ec2-23,EC2,m5.4xlarge,ap-south-1,data-eng,sandbox,4.04,31.7,18.43,552.9,39,2026-03-07,running,"team:data-eng,env:sandbox"
+i-01676168421,frontend-prod-ec2-24,EC2,t3.medium,eu-west-1,frontend,prod,0.69,12.21,1.0,30.0,98,2026-03-17,running,"team:frontend,env:prod"
+i-03553440342,platform-prod-ec2-25,EC2,m5.xlarge,eu-west-1,platform,prod,38.65,12.38,4.61,138.3,89,2026-03-13,running,"team:platform,env:prod"
+vol-08370987661,frontend-dev-ebs-01,EBS,gp3-1000GB,ap-south-1,frontend,dev,0,0,3.33,100.0,71,2026-03-15,available (unattached),"team:frontend,env:dev"
+vol-08067372072,ml-ops-staging-ebs-02,EBS,gp3-200GB,us-east-1,ml-ops,staging,0,0,0.67,20.0,176,2025-11-30,available (unattached),"team:ml-ops,env:staging"
+vol-01429416213,platform-prod-ebs-03,EBS,gp3-200GB,eu-west-1,platform,prod,0,0,0.67,20.0,122,2026-01-23,available (unattached),"team:platform,env:prod"
+vol-09303285822,frontend-dev-ebs-04,EBS,gp3-100GB,ap-south-1,frontend,dev,0,0,0.33,10.0,38,2026-04-17,available (unattached),"team:frontend,env:dev"
+vol-03271782991,data-eng-dev-ebs-05,EBS,gp3-100GB,eu-west-1,data-eng,dev,0,0,0.33,10.0,45,2026-04-10,available (unattached),"team:data-eng,env:dev"
+vol-02135872495,data-eng-prod-ebs-06,EBS,gp3-1000GB,ap-south-1,data-eng,prod,0,0,3.33,100.0,39,2026-04-16,available (unattached),"team:data-eng,env:prod"
+vol-03597724331,payments-dev-ebs-07,EBS,gp3-1000GB,eu-west-1,payments,dev,0,0,3.33,100.0,77,2026-03-09,available (unattached),"team:payments,env:dev"
+vol-07393195616,payments-dev-ebs-08,EBS,gp3-200GB,ap-south-1,payments,dev,0,0,0.67,20.0,134,2026-01-11,available (unattached),"team:payments,env:dev"
+vol-05310195918,ml-ops-dev-ebs-09,EBS,gp3-50GB,ap-south-1,ml-ops,dev,0,0,0.17,5.0,16,2026-05-09,available (unattached),"team:ml-ops,env:dev"
+vol-08325785916,payments-sandbox-ebs-10,EBS,gp3-200GB,eu-west-1,payments,sandbox,0,0,0.67,20.0,146,2025-12-30,available (unattached),"team:payments,env:sandbox"
+vol-03343292475,platform-prod-ebs-11,EBS,gp3-50GB,us-east-1,platform,prod,0,0,0.17,5.0,43,2026-04-12,available (unattached),"team:platform,env:prod"
+vol-01547374338,frontend-staging-ebs-12,EBS,gp3-1000GB,us-east-1,frontend,staging,0,0,3.33,100.0,115,2026-01-30,available (unattached),"team:frontend,env:staging"
+vol-03929454134,platform-dev-ebs-13,EBS,gp3-200GB,eu-west-1,platform,dev,0,0,0.67,20.0,58,2026-03-28,available (unattached),"team:platform,env:dev"
+db-DAT4878,data-eng-sandbox-rds-01,RDS,db.t3.medium,ap-south-1,data-eng,sandbox,43.73,78.33,1.63,48.9,55,2026-04-29,available,"team:data-eng,env:sandbox"
+db-PAY5065,payments-dev-rds-02,RDS,db.t3.medium,us-east-1,payments,dev,28.53,21.14,1.63,48.9,189,2026-05-19,available,"team:payments,env:dev"
+db-PLA8541,platform-sandbox-rds-03,RDS,db.r5.2xlarge,us-east-1,platform,sandbox,2.91,31.36,23.04,691.2,68,2026-05-11,available,"team:platform,env:sandbox"
+db-PAY5573,payments-sandbox-rds-04,RDS,db.t3.medium,us-east-1,payments,sandbox,3.07,54.9,1.63,48.9,112,2026-04-12,available,"team:payments,env:sandbox"
+db-PLA5349,platform-prod-rds-05,RDS,db.m5.xlarge,ap-south-1,platform,prod,21.75,17.59,8.21,246.3,121,2026-05-03,available,"team:platform,env:prod"
+db-ML-5173,ml-ops-prod-rds-06,RDS,db.m5.xlarge,ap-south-1,ml-ops,prod,35.95,59.61,8.21,246.3,10,2026-04-22,available,"team:ml-ops,env:prod"
+db-DAT6409,data-eng-sandbox-rds-07,RDS,db.m5.large,eu-west-1,data-eng,sandbox,2.43,31.97,4.1,123.0,41,2026-05-06,available,"team:data-eng,env:sandbox"
+db-ML-3085,ml-ops-dev-rds-08,RDS,db.m5.xlarge,ap-south-1,ml-ops,dev,23.3,39.43,8.21,246.3,107,2026-04-12,available,"team:ml-ops,env:dev"
+s3-frontend-dev-bucket-01,frontend-dev-s3-01,S3,Standard-1625GB,us-east-1,frontend,dev,0,0,1.25,37.38,310,2026-04-04,active,"team:frontend,env:dev"
+s3-data-eng-dev-bucket-02,data-eng-dev-s3-02,S3,Standard-203GB,us-east-1,data-eng,dev,0,0,0.16,4.67,250,2026-04-29,active,"team:data-eng,env:dev"
+s3-frontend-dev-bucket-03,frontend-dev-s3-03,S3,Standard-4951GB,us-east-1,frontend,dev,0,0,3.8,113.87,256,2026-03-27,active,"team:frontend,env:dev"
+s3-payments-sandbox-bucket-04,payments-sandbox-s3-04,S3,Standard-3822GB,us-east-1,payments,sandbox,0,0,2.93,87.91,116,2026-02-13,active,"team:payments,env:sandbox"
+s3-data-eng-dev-bucket-05,data-eng-dev-s3-05,S3,Standard-894GB,us-east-1,data-eng,dev,0,0,0.69,20.56,150,2026-05-14,active,"team:data-eng,env:dev"
+s3-payments-staging-bucket-06,payments-staging-s3-06,S3,Standard-2742GB,us-east-1,payments,staging,0,0,2.1,63.07,42,2026-05-07,active,"team:payments,env:staging"
+s3-payments-sandbox-bucket-07,payments-sandbox-s3-07,S3,Standard-578GB,us-east-1,payments,sandbox,0,0,0.44,13.29,67,2026-03-08,active,"team:payments,env:sandbox"
+s3-ml-ops-staging-bucket-08,ml-ops-staging-s3-08,S3,Standard-3930GB,us-east-1,ml-ops,staging,0,0,3.01,90.39,226,2026-02-23,active,"team:ml-ops,env:staging"
+eipalloc-063640499,ml-ops-unused-eip-01,Elastic IP,eip-unassociated,ap-south-1,ml-ops,staging,0,0,0.12,3.6,88,2026-05-15,unassociated,team:ml-ops
+eipalloc-067062156,platform-unused-eip-02,Elastic IP,eip-unassociated,ap-south-1,platform,staging,0,0,0.12,3.6,71,2026-03-17,unassociated,team:platform
+eipalloc-084813739,platform-unused-eip-03,Elastic IP,eip-unassociated,ap-south-1,platform,prod,0,0,0.12,3.6,63,2026-04-28,unassociated,team:platform
+eipalloc-099600766,ml-ops-unused-eip-04,Elastic IP,eip-unassociated,eu-west-1,ml-ops,dev,0,0,0.12,3.6,61,2026-02-26,unassociated,team:ml-ops
+eipalloc-067276174,frontend-unused-eip-05,Elastic IP,eip-unassociated,eu-west-1,frontend,sandbox,0,0,0.12,3.6,25,2026-03-16,unassociated,team:frontend
+eipalloc-044788100,ml-ops-unused-eip-06,Elastic IP,eip-unassociated,ap-south-1,ml-ops,dev,0,0,0.12,3.6,71,2026-03-14,unassociated,team:ml-ops
+nat-0abc123456,platform-prod-nat-01,NAT Gateway,nat-gateway,us-east-1,platform,prod,0.3,0,1.44,43.2,180,2026-01-15,active,"team:platform,env:prod"
+nat-0def234567,data-eng-dev-nat-02,NAT Gateway,nat-gateway,eu-west-1,data-eng,dev,0.05,0,1.08,32.4,90,2026-02-20,active,"team:data-eng,env:dev"
+nat-0ghi345678,frontend-staging-nat-03,NAT Gateway,nat-gateway,ap-south-1,frontend,staging,0.12,0,1.08,32.4,65,2026-03-10,active,"team:frontend,env:staging"
+alb-0abc123456,platform-dev-alb-01,ALB,application,us-east-1,platform,dev,0,0,0.53,16.0,45,2026-04-10,active,"team:platform,env:dev"
+alb-0def234567,payments-staging-alb-02,ALB,application,eu-west-1,payments,staging,0,1,0.53,16.0,72,2026-03-20,active,"team:payments,env:staging"
+nlb-0ghi345678,ml-ops-dev-nlb-03,NLB,network,ap-south-1,ml-ops,dev,0,0,0.53,16.0,38,2026-04-25,active,"team:ml-ops,env:dev"
+i-old1234abc,platform-dev-ec2-oldgen-01,EC2,t2.large,us-east-1,platform,dev,45.2,52.3,2.23,66.82,180,2026-05-01,running,"team:platform,env:dev"
+i-old2345bcd,data-eng-prod-ec2-oldgen-02,EC2,m4.xlarge,eu-west-1,data-eng,prod,38.7,45.1,4.8,144.0,365,2026-05-10,running,"team:data-eng,env:prod"
+i-old3456cde,payments-staging-ec2-oldgen-03,EC2,c4.4xlarge,us-east-1,payments,staging,52.1,38.9,19.1,573.12,220,2026-04-15,running,"team:payments,env:staging"
+i-old4567def,frontend-prod-ec2-oldgen-04,EC2,r4.2xlarge,ap-south-1,frontend,prod,28.4,62.3,12.77,383.04,150,2026-05-05,running,"team:frontend,env:prod"
+snap-0abc123456,platform-dev-snap-01,EBS Snapshot,snap-500GB,us-east-1,platform,dev,0,0,0.83,25.0,120,2026-01-25,orphaned,"team:platform,env:dev"
+snap-0def234567,data-eng-prod-snap-02,EBS Snapshot,snap-1000GB,eu-west-1,data-eng,prod,0,0,1.67,50.0,200,2025-11-04,orphaned,"team:data-eng,env:prod"
+snap-0ghi345678,payments-staging-snap-03,EBS Snapshot,snap-2000GB,us-east-1,payments,staging,0,0,3.33,100.0,95,2026-02-19,orphaned,"team:payments,env:staging"
+snap-0jkl456789,ml-ops-sandbox-snap-04,EBS Snapshot,snap-200GB,ap-south-1,ml-ops,sandbox,0,0,0.33,10.0,150,2026-01-26,orphaned,"team:ml-ops,env:sandbox"
+snap-0mno567890,frontend-dev-snap-05,EBS Snapshot,snap-300GB,eu-west-1,frontend,dev,0,0,0.5,15.0,110,2026-02-04,orphaned,"team:frontend,env:dev"
+vol-gp2-123456,frontend-prod-gp2-01,EBS,gp2-500GB,us-east-1,frontend,prod,0,0,1.67,50.0,365,2025-05-26,in-use,"team:frontend,env:prod"
+vol-gp2-234567,data-eng-staging-gp2-02,EBS,gp2-1000GB,eu-west-1,data-eng,staging,0,0,3.33,100.0,180,2025-11-26,in-use,"team:data-eng,env:staging"
+vol-gp2-345678,payments-dev-gp2-03,EBS,gp2-200GB,us-east-1,payments,dev,0,0,0.67,20.0,90,2026-02-24,in-use,"team:payments,env:dev"
+vol-gp2-456789,ml-ops-prod-gp2-04,EBS,gp2-300GB,ap-south-1,ml-ops,prod,0,0,1.0,30.0,270,2025-09-28,in-use,"team:ml-ops,env:prod"
+vol-gp2-567890,platform-staging-gp2-05,EBS,gp2-100GB,eu-west-1,platform,staging,0,0,0.33,10.0,120,2026-01-26,in-use,"team:platform,env:staging"
+i-od1234abc,platform-prod-ec2-od-01,EC2,m5.xlarge,us-east-1,platform,prod,65.4,42.1,4.61,138.3,240,2025-09-28,running-ondemand,"team:platform,env:prod"
+i-od2345bcd,payments-prod-ec2-od-02,EC2,m5.2xlarge,us-east-1,payments,prod,72.3,55.8,9.22,276.6,180,2025-11-27,running-ondemand,"team:payments,env:prod"
+i-od3456cde,data-eng-prod-ec2-od-03,EC2,c5.4xlarge,us-east-1,data-eng,prod,58.7,38.4,16.32,489.6,365,2025-05-26,running-ondemand,"team:data-eng,env:prod"
+i-od4567def,frontend-prod-ec2-od-04,EC2,r5.2xlarge,us-east-1,frontend,prod,45.2,68.9,12.1,363.0,270,2025-09-28,running-ondemand,"team:frontend,env:prod"
+lg-platform-001,/aws/lambda/platform-api,CloudWatch Logs,log-group-infinite,us-east-1,platform,prod,0,0,0.5,15.0,365,2025-05-26,active,"team:platform,env:prod"
+lg-payments-002,/aws/ecs/payments-service,CloudWatch Logs,log-group-infinite,eu-west-1,payments,prod,0,0,1.2,36.0,270,2025-09-28,active,"team:payments,env:prod"
+lg-data-eng-003,/aws/glue/data-pipeline,CloudWatch Logs,log-group-infinite,us-east-1,data-eng,prod,0,0,0.8,24.0,180,2025-11-27,active,"team:data-eng,env:prod"
+lg-ml-ops-004,/aws/sagemaker/training,CloudWatch Logs,log-group-infinite,ap-south-1,ml-ops,prod,0,0,0.3,9.0,120,2026-01-26,active,"team:ml-ops,env:prod"
+lg-frontend-005,/aws/cloudfront/access,CloudWatch Logs,log-group-infinite,us-east-1,frontend,prod,0,0,0.6,18.0,200,2025-11-07,active,"team:frontend,env:prod"
+i-stop123abc,platform-dev-ec2-stopped-01,EC2,m5.xlarge,us-east-1,platform,dev,0,0,0.5,15.0,60,2026-03-27,stopped,"team:platform,env:dev"
+i-stop234bcd,data-eng-sandbox-ec2-stopped-02,EC2,m5.2xlarge,eu-west-1,data-eng,sandbox,0,0,1.0,30.0,90,2026-02-25,stopped,"team:data-eng,env:sandbox"
+i-stop345cde,payments-dev-ec2-stopped-03,EC2,c5.4xlarge,us-east-1,payments,dev,0,0,1.67,50.0,45,2026-04-11,stopped,"team:payments,env:dev"
+i-stop456def,ml-ops-staging-ec2-stopped-04,EC2,t3.large,ap-south-1,ml-ops,staging,0,0,0.33,10.0,30,2026-04-26,stopped,"team:ml-ops,env:staging"
+cache-prod-001,data-eng-redis-prod-01,ElastiCache,cache.r6g.large,us-east-1,data-eng,prod,4.2,15.3,3.98,119.5,90,2026-02-25,available,"team:data-eng,env:prod"
+cache-dev-002,platform-dev-redis-02,ElastiCache,cache.r6g.large,eu-west-1,platform,dev,1.8,8.7,3.98,119.5,60,2026-03-27,available,"team:platform,env:dev"
+rs-staging-001,data-eng-redshift-staging,Redshift,dc2.large,us-east-1,data-eng,staging,3.5,12.4,6.0,180.0,180,2025-11-27,available,"team:data-eng,env:staging"
+rs-prod-001,ml-ops-redshift-prod,Redshift,dc2.xlarge,ap-south-1,ml-ops,prod,2.1,9.8,12.0,360.0,120,2026-01-26,available,"team:ml-ops,env:prod"
diff --git a/dashboard_AI.py b/dashboard_AI.py
new file mode 100644
index 0000000..38bdefc
--- /dev/null
+++ b/dashboard_AI.py
@@ -0,0 +1,370 @@
+import json, os, urllib.request
+import streamlit as st
+import plotly.express as px
+import pandas as pd
+import requests
+
+st.set_page_config(page_title="Cloud Cost Waste Hunter", page_icon="",
+    layout="wide", initial_sidebar_state="expanded")
+
+st.markdown("""
+<style>
+.main-header{font-size:1.8rem;font-weight:700;color:var(--text-color)}
+.sub-header{color:#64748B;font-size:0.88rem;margin-top:-6px;margin-bottom:16px}
+.metric-card{background:white;border-radius:12px;padding:16px 20px;border:1px solid #e8e8e8;box-shadow:0 1px 4px rgba(0,0,0,0.05)}
+.metric-label{font-size:0.72rem;color:#888;font-weight:500;text-transform:uppercase;letter-spacing:.05em;margin-bottom:3px}
+.metric-value{font-size:1.7rem;font-weight:700;color:#1a1a2e;line-height:1.1}
+.metric-sub{font-size:0.78rem;color:#e05252;margin-top:3px;font-weight:500}
+.exec-summary{background:#f0f7ff;border-left:4px solid #3b82f6;border-radius:0 8px 8px 0;padding:14px 18px;font-size:0.88rem;color:#1e3a5f;line-height:1.6;margin-bottom:20px}
+.finding-card{background:white;border-radius:10px;padding:14px 18px;border:1px solid #e8e8e8;margin-bottom:10px;border-left:4px solid #e05252}
+.finding-card.medium{border-left-color:#f59e0b}
+.finding-card.low{border-left-color:#6b7280}
+.finding-rank{font-size:0.72rem;color:#888;font-weight:600}
+.finding-name{font-size:0.95rem;font-weight:600;color:#1a1a2e;margin:2px 0}
+.finding-plain{font-size:0.84rem;color:#444;line-height:1.5;margin:5px 0}
+.finding-meta{display:flex;gap:10px;flex-wrap:wrap;margin-top:6px}
+.badge{font-size:0.7rem;font-weight:600;padding:2px 8px;border-radius:99px;display:inline-block}
+.badge-high{background:#fee2e2;color:#b91c1c}
+.badge-medium{background:#fef3c7;color:#92400e}
+.badge-low{background:#f3f4f6;color:#374151}
+.badge-category{background:#ede9fe;color:#5b21b6}
+.saving-tag{font-size:0.84rem;font-weight:700;color:#e05252}
+.cli-box{background:#1e1e2e;color:#a6e3a1;font-family:monospace;font-size:0.76rem;padding:8px 12px;border-radius:6px;margin-top:6px;overflow-x:auto;white-space:nowrap}
+.quick-win{background:#f0fdf4;border:1px solid #bbf7d0;border-radius:8px;padding:10px 14px;margin-bottom:6px;font-size:0.84rem;color:#166534}
+.source-badge{background:#f1f5f9;border:1px solid #e2e8f0;border-radius:6px;padding:4px 10px;font-size:0.78rem;color:#475569;margin-bottom:12px;display:inline-block}
+.chat-panel{background:white;border:1px solid #e2e8f0;border-radius:12px;padding:14px;height:100%;display:flex;flex-direction:column}
+.chat-panel-header{font-size:0.95rem;font-weight:600;color:#1a1a2e;margin-bottom:4px;display:flex;align-items:center;gap:8px}
+.chat-panel-sub{font-size:0.76rem;color:#888;margin-bottom:12px}
+.chat-bubble-user{background:#EFF6FF;border-radius:10px;padding:8px 12px;margin:4px 0;font-size:0.84rem;color:#1E3A5F}
+.chat-bubble-ai{background:#F8FAFC;border:1px solid #E2E8F0;border-radius:10px;padding:8px 12px;margin:4px 0;font-size:0.84rem;color:#1E293B}
+.sug-btn{font-size:0.75rem}
+</style>
+""", unsafe_allow_html=True)
+
+# ── Load report ────────────────────────────────────────────────────────────────
+@st.cache_data
+def load_report(path="llm_report.json"):
+    with open(path) as f:
+        return json.load(f)
+
+report     = load_report()
+quick_wins = report.get("quick_wins", [])
+raw_findings = report.get("findings", [])
+
+def normalise(f):
+    return {
+        "rank":            f.get("rank", 0),
+        "name":            f.get("service", f.get("resource_name", "Unknown")),
+        "category":        f.get("category", f.get("flag", "—")),
+        "plain_english":   f.get("plain_english", ""),
+        "business_impact": f.get("business_impact", ""),
+        "monthly_saving":  f.get("monthly_opportunity", f.get("monthly_saving", 0.0)),
+        "priority_action": f.get("priority_action", ""),
+        "aws_action":      f.get("aws_action", f.get("cli_fix", "")),
+        "severity":        f.get("severity", "HIGH" if f.get("monthly_opportunity", f.get("monthly_saving", 0)) > 100 else "MEDIUM"),
+    }
+
+findings      = [normalise(f) for f in raw_findings]
+total_monthly = report.get("total_monthly_opportunity", report.get("total_monthly_waste", 0))
+total_annual  = report.get("total_annual_waste", total_monthly * 12)
+total_spend   = report.get("total_monthly_spend", 0)
+raw_services  = report.get("raw_data", {}).get("services", [])
+all_f_legacy  = report.get("all_findings", [])
+
+# ── Claude chatbot helpers ─────────────────────────────────────────────────────
+def build_context():
+    lines = [
+        "You are a senior FinOps engineer assistant in the Ghost Busters Cloud Cost Waste Hunter dashboard.",
+        "Answer clearly and concisely, grounding every response in the actual account data below.",
+        "Keep answers to 3-5 sentences unless the user asks for detail.",
+        "",
+        f"Data source: {report.get('source', 'AWS Cost Explorer')}",
+        f"Monthly spend: ${total_spend:,.2f}" if total_spend else "",
+        f"Monthly opportunity: ${total_monthly:,.2f}",
+        f"Executive summary: {report.get('executive_summary', '')}",
+        "",
+        "FINDINGS:",
+    ]
+    for fi in raw_findings:
+        lines.append(
+            f"#{fi.get('rank','')} {fi.get('service', fi.get('resource_name',''))} | "
+            f"${fi.get('monthly_opportunity', fi.get('monthly_saving', 0)):,.2f}/mo | "
+            f"{fi.get('plain_english','')[:120]} | "
+            f"Action: {fi.get('priority_action','')[:80]}"
+        )
+    lines += ["", "QUICK WINS:"] + [f"- {w}" for w in quick_wins]
+    sb = report.get("service_breakdown", {})
+    if sb:
+        lines += [
+            f"Biggest concern: {sb.get('biggest_concern','')}",
+            f"Watch list: {', '.join(sb.get('watch_list',[]))}",
+        ]
+    lines.append(f"Recommendation: {report.get('closing_recommendation','')}")
+    return "\n".join(l for l in lines if l is not None)
+
+def call_claude(messages):
+    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    if not api_key:
+        return "⚠️ ANTHROPIC_API_KEY not set. Run `export ANTHROPIC_API_KEY='sk-ant-...'` then restart Streamlit."
+    try:
+        payload = json.dumps({
+            "model": "claude-sonnet-4-20250514",
+            "max_tokens": 800,
+            "system": build_context(),
+            "messages": messages
+        }).encode()
+        req = urllib.request.Request(
+            "https://api.anthropic.com/v1/messages",
+            data=payload,
+            headers={"Content-Type":"application/json",
+                     "x-api-key":api_key,
+                     "anthropic-version":"2023-06-01"},
+            method="POST"
+        )
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            data = json.loads(resp.read().decode())
+        return data["content"][0]["text"]
+    except Exception as e:
+        return f"❌ Error: {e}"
+
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+
+# ── Sidebar ────────────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.markdown("## Ghost Busters")
+    st.markdown("*Cloud Cost Waste Hunter*")
+    st.markdown("---")
+    categories   = sorted(set(f["category"] for f in findings))
+    selected_cats = st.multiselect("Filter by category", categories, default=categories)
+    selected_sev  = st.multiselect("Filter by severity", ["HIGH","MEDIUM","LOW"], default=["HIGH","MEDIUM","LOW"])
+    st.markdown("---")
+    st.markdown("**Slack webhook alert**")
+    slack_url = st.text_input("Webhook URL", placeholder="https://hooks.slack.com/...")
+    if st.button("🔔 Fire top finding alert", use_container_width=True):
+        if slack_url and findings:
+            top = findings[0]
+            payload = {"blocks":[
+                {"type":"header","text":{"type":"plain_text","text":" Cloud Cost Waste Hunter Alert"}},
+                {"type":"section","text":{"type":"mrkdwn","text":f"*#{top['rank']} — {top['name']}*\n{top['plain_english']}"}},
+                {"type":"section","fields":[
+                    {"type":"mrkdwn","text":f"*Opportunity*\n${top['monthly_saving']:,.2f}/mo"},
+                    {"type":"mrkdwn","text":f"*Action*\n{top['priority_action'][:80]}..."}
+                ]},
+                {"type":"section","text":{"type":"mrkdwn","text":f"*Total opportunity:* ${total_monthly:,.2f}/mo"}}
+            ]}
+            try:
+                r = requests.post(slack_url, json=payload, timeout=5)
+                st.success("✅ Sent!") if r.status_code==200 else st.error(f"Failed: {r.status_code}")
+            except Exception as e:
+                st.error(str(e))
+        else:
+            st.warning("Enter a Slack webhook URL first")
+    st.markdown("---")
+    st.caption(f"Generated: {report.get('generated_at','—')}")
+    if report.get("source"): st.caption(f"Source: {report['source']}")
+
+# ── Page header ───────────────────────────────────────────────────────────────
+st.markdown('<div class="main-header"> Cloud Cost Waste Hunter</div>', unsafe_allow_html=True)
+st.markdown('<div class="sub-header">AI-powered AWS cost analysis · Perforce Global Jam 2026</div>', unsafe_allow_html=True)
+if report.get("source"):
+    st.markdown(f'<span class="source-badge">📊 {report["source"]}</span>', unsafe_allow_html=True)
+
+# ── MAIN LAYOUT: left 62% content | right 38% chatbot ─────────────────────────
+main_col, chat_col = st.columns([0.62, 0.38])
+
+with main_col:
+    # Metric cards
+    c1, c2, c3, c4 = st.columns(4)
+    with c1:
+        st.markdown(f"""<div class="metric-card">
+            <div class="metric-label">Monthly opportunity</div>
+            <div class="metric-value">${total_monthly:,.0f}</div>
+            <div class="metric-sub">recoverable now</div>
+        </div>""", unsafe_allow_html=True)
+    with c2:
+        st.markdown(f"""<div class="metric-card">
+            <div class="metric-label">Annual opportunity</div>
+            <div class="metric-value">${total_annual:,.0f}</div>
+            <div class="metric-sub">if unaddressed</div>
+        </div>""", unsafe_allow_html=True)
+    with c3:
+        st.markdown(f"""<div class="metric-card">
+            <div class="metric-label">Findings</div>
+            <div class="metric-value">{len(findings)}</div>
+            <div class="metric-sub">services flagged</div>
+        </div>""", unsafe_allow_html=True)
+    with c4:
+        if total_spend > 0:
+            pct = round((total_monthly / total_spend) * 100, 1)
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Total spend</div>
+                <div class="metric-value">${total_spend:,.0f}</div>
+                <div class="metric-sub">{pct}% recoverable</div>
+            </div>""", unsafe_allow_html=True)
+        else:
+            top_f = findings[0] if findings else {}
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Top finding</div>
+                <div class="metric-value">{top_f.get('name','—')[:12]}</div>
+                <div class="metric-sub">${top_f.get('monthly_saving',0):,.0f}/mo</div>
+            </div>""", unsafe_allow_html=True)
+
+    st.markdown("<br>", unsafe_allow_html=True)
+
+    # AI summary
+    st.markdown(f'<div class="exec-summary">🤖 <strong>AI Summary</strong><br>{report["executive_summary"]}</div>',
+        unsafe_allow_html=True)
+
+    # Charts
+    chart_l, chart_r = st.columns(2)
+    with chart_l:
+        st.markdown("#### Cost by service")
+        src = raw_services or []
+        if src:
+            svc_df = pd.DataFrame([
+                {"Service": s["service"][:22], "April ($)": s["apr_2026"]}
+                for s in sorted(src, key=lambda x: -x["apr_2026"])[:8]
+            ])
+            fig = px.bar(svc_df, x="April ($)", y="Service", orientation="h",
+                color="April ($)", color_continuous_scale=["#fde8e8","#e05252"], text="April ($)")
+            fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+            fig.update_layout(showlegend=False, coloraxis_showscale=False,
+                plot_bgcolor="white", paper_bgcolor="white",
+                margin=dict(l=0,r=60,t=10,b=0), height=260,
+                yaxis=dict(showgrid=False), xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+            st.plotly_chart(fig, use_container_width=True)
+        elif all_f_legacy:
+            svc_t = {}
+            for f in all_f_legacy:
+                svc_t[f.get("service","Other")] = svc_t.get(f.get("service","Other"),0)+f.get("monthly_waste_usd",0)
+            sdf = pd.DataFrame([{"Service":k,"Waste ($)":round(v,2)} for k,v in sorted(svc_t.items(),key=lambda x:-x[1])])
+            fig = px.bar(sdf,x="Waste ($)",y="Service",orientation="h",
+                color="Waste ($)",color_continuous_scale=["#fde8e8","#e05252"],text="Waste ($)")
+            fig.update_traces(texttemplate="$%{text:,.0f}",textposition="outside")
+            fig.update_layout(showlegend=False,coloraxis_showscale=False,
+                plot_bgcolor="white",paper_bgcolor="white",
+                margin=dict(l=0,r=60,t=10,b=0),height=260,
+                yaxis=dict(showgrid=False),xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+            st.plotly_chart(fig, use_container_width=True)
+
+    with chart_r:
+        st.markdown("#### Opportunity by category")
+        cat_t = {}
+        for f in findings:
+            cat_t[f["category"]] = cat_t.get(f["category"],0) + f["monthly_saving"]
+        if cat_t:
+            cdf = pd.DataFrame([{"Category":k,"Opp ($)":round(v,2)} for k,v in sorted(cat_t.items(),key=lambda x:-x[1]) if v>0])
+            fig2 = px.pie(cdf,values="Opp ($)",names="Category",
+                color_discrete_sequence=["#e05252","#f59e0b","#3b82f6","#8b5cf6","#10b981"],hole=0.45)
+            fig2.update_traces(textposition="outside",textinfo="label+percent")
+            fig2.update_layout(showlegend=False,paper_bgcolor="white",
+                margin=dict(l=0,r=0,t=10,b=0),height=260)
+            st.plotly_chart(fig2, use_container_width=True)
+
+    # Quick wins
+    if quick_wins:
+        st.markdown("#### ⚡ Quick wins")
+        for w in quick_wins[:3]:
+            st.markdown(f'<div class="quick-win">✅ {w}</div>', unsafe_allow_html=True)
+
+    st.markdown("<br>", unsafe_allow_html=True)
+
+    # Findings
+    st.markdown("#### 🔍 Flagged services")
+    filtered = [f for f in findings if f["category"] in selected_cats and f["severity"] in selected_sev]
+    if not filtered:
+        st.info("No findings match filters.")
+    else:
+        show_action = st.toggle("Show AWS remediation actions", value=False)
+        for f in filtered:
+            sev = f["severity"].lower()
+            action_html = f'<div class="cli-box">$ {f["aws_action"]}</div>' if show_action and f["aws_action"] else ""
+            saving = f"${f['monthly_saving']:,.2f}/mo opportunity" if f["monthly_saving"] > 0 else "Investigate"
+            st.markdown(f"""
+            <div class="finding-card {sev}">
+                <div class="finding-rank">FINDING #{f['rank']}</div>
+                <div class="finding-name">{f['name']}</div>
+                <div class="finding-plain">{f['plain_english']}</div>
+                <div style="font-size:0.8rem;color:#666;margin:4px 0"><em>Impact: {f['business_impact']}</em></div>
+                <div class="finding-meta">
+                    <span class="badge badge-{sev}">{f['severity']}</span>
+                    <span class="badge badge-category">🏷 {f['category']}</span>
+                    <span class="saving-tag">💰 {saving}</span>
+                </div>
+                <div style="font-size:0.8rem;color:#555;margin-top:6px">🔧 {f['priority_action']}</div>
+                {action_html}
+            </div>""", unsafe_allow_html=True)
+
+    # Service insights
+    sb = report.get("service_breakdown", {})
+    if sb:
+        st.markdown("---")
+        st.markdown("#### 📊 Service insights")
+        si1, si2 = st.columns(2)
+        with si1:
+            if sb.get("biggest_concern"): st.error(f"🚨 **Biggest concern:** {sb['biggest_concern']}")
+            if sb.get("most_improved"):   st.success(f"✅ **Most improved:** {sb['most_improved']}")
+        with si2:
+            if sb.get("watch_list"):      st.warning(f"👀 **Watch list:** {', '.join(sb['watch_list'])}")
+
+    st.markdown("---")
+    st.markdown("#### 📋 Leadership recommendation")
+    st.info(report.get("closing_recommendation", ""))
+    st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter")
+
+# ── RIGHT PANEL: FinOps AI Chatbot ────────────────────────────────────────────
+with chat_col:
+    st.markdown("""
+    <div style="background:white;border:1px solid #e2e8f0;border-radius:12px;padding:16px;position:sticky;top:0">
+        <div style="font-size:1rem;font-weight:600;color:#1a1a2e;margin-bottom:2px">🤖 FinOps AI</div>
+        <div style="font-size:0.76rem;color:#888;margin-bottom:12px;border-bottom:1px solid #f1f5f9;padding-bottom:10px">
+            Ask anything about your AWS costs
+        </div>
+    </div>
+    """, unsafe_allow_html=True)
+
+    # Suggested questions
+    suggestions = [
+        "Which service should I fix first?",
+        "Why did EC2-Other spike?",
+        "How much can we save on Neptune?",
+        "What is the DevOpsAgent charge?",
+        "Give me a 3-step action plan",
+    ]
+    st.markdown("<p style='font-size:0.76rem;color:#888;margin:10px 0 6px'>💡 Suggested questions:</p>",
+        unsafe_allow_html=True)
+    for i, sug in enumerate(suggestions):
+        if st.button(sug, key=f"sug_{i}", use_container_width=True):
+            st.session_state.chat_history.append({"role":"user","content":sug})
+            with st.spinner("Thinking..."):
+                ans = call_claude(st.session_state.chat_history)
+            st.session_state.chat_history.append({"role":"assistant","content":ans})
+
+    st.markdown("<div style='margin-top:10px'>", unsafe_allow_html=True)
+
+    # Chat history
+    for msg in st.session_state.chat_history:
+        if msg["role"] == "user":
+            st.markdown(
+                f"<div class='chat-bubble-user'><strong>You:</strong> {msg['content']}</div>",
+                unsafe_allow_html=True)
+        else:
+            st.markdown(
+                f"<div class='chat-bubble-ai'><strong>🤖 FinOps AI:</strong> {msg['content']}</div>",
+                unsafe_allow_html=True)
+
+    st.markdown("</div>", unsafe_allow_html=True)
+
+    # Input
+    if prompt_input := st.chat_input("Ask about your AWS costs..."):
+        st.session_state.chat_history.append({"role":"user","content":prompt_input})
+        with st.spinner("Thinking..."):
+            ans = call_claude(st.session_state.chat_history)
+        st.session_state.chat_history.append({"role":"assistant","content":ans})
+        st.rerun()
+
+    if st.session_state.chat_history:
+        if st.button("🗑️ Clear chat", use_container_width=True):
+            st.session_state.chat_history = []
+            st.rerun()
diff --git a/detection_engine.py b/detection_engine.py
index 4883b55..0612223 100644
--- a/detection_engine.py
+++ b/detection_engine.py
@@ -26,6 +26,37 @@
     "db.r5.2xlarge":("db.r5.xlarge",0.48),
 }
 
+# Threshold for NAT Gateway idle detection
+NAT_IDLE_TRAFFIC_GB = 1.0   # GB/week — below this = idle NAT Gateway
+SNAPSHOT_AGE_THRESHOLD  = 90    # days — orphaned snapshots older than this = waste
+GP2_TO_GP3_SAVING_PCT   = 0.20  # gp3 is 20% cheaper than gp2 per GB
+RI_SP_DAYS_THRESHOLD    = 30    # days on-demand before flagging for RI/SP purchase
+RI_SP_SAVING_PCT        = 0.40  # estimated saving with Reserved Instance or Savings Plan
+CACHE_CPU_THRESHOLD     = 10.0  # % — ElastiCache/Redshift below this = underutilized
+LOG_RETENTION_SAVING_PCT = 0.70 # saving from applying 30-day retention to infinite log groups
+
+# Old-generation → new-generation map: "old_type": ("new_type", old_hourly_usd, new_hourly_usd)
+OLD_GEN_MAP = {
+    "t2.micro":    ("t3.micro",    0.0116, 0.0104),
+    "t2.small":    ("t3.small",    0.023,  0.0208),
+    "t2.medium":   ("t3.medium",   0.0464, 0.0416),
+    "t2.large":    ("t3.large",    0.0928, 0.0832),
+    "t2.xlarge":   ("t3.xlarge",   0.1856, 0.1664),
+    "t2.2xlarge":  ("t3.2xlarge",  0.3712, 0.3328),
+    "m4.large":    ("m5.large",    0.1,    0.096),
+    "m4.xlarge":   ("m5.xlarge",   0.2,    0.192),
+    "m4.2xlarge":  ("m5.2xlarge",  0.4,    0.384),
+    "m4.4xlarge":  ("m5.4xlarge",  0.8,    0.768),
+    "c4.large":    ("c5.large",    0.1,    0.085),
+    "c4.xlarge":   ("c5.xlarge",   0.199,  0.17),
+    "c4.2xlarge":  ("c5.2xlarge",  0.398,  0.34),
+    "c4.4xlarge":  ("c5.4xlarge",  0.796,  0.68),
+    "r4.large":    ("r5.large",    0.133,  0.126),
+    "r4.xlarge":   ("r5.xlarge",   0.266,  0.252),
+    "r4.2xlarge":  ("r5.2xlarge",  0.532,  0.504),
+    "r4.4xlarge":  ("r5.4xlarge",  1.064,  1.008),
+}
+
 # ─── Load data ────────────────────────────────────────────────────────────────
 def load_data(filepath="aws_cost_data.csv"):
     df = pd.read_csv(filepath, parse_dates=["last_accessed"])
@@ -39,7 +70,8 @@ def detect_idle_ec2(df):
     ec2 = df[df["service"] == "EC2"].copy()
     idle = ec2[
         (ec2["cpu_avg_7d"] < IDLE_CPU_THRESHOLD) &
-        (ec2["days_running"] >= IDLE_DAYS_THRESHOLD)
+        (ec2["days_running"] >= IDLE_DAYS_THRESHOLD) &
+        (~ec2["status"].str.contains("stopped", case=False, na=False))
     ]
     for _, r in idle.iterrows():
         findings.append({
@@ -192,6 +224,253 @@ def detect_idle_rds(df):
     return findings
 
 
+def detect_nat_idle(df):
+    # For NAT Gateway rows, cpu_avg_7d encodes network traffic in GB/week
+    findings = []
+    nats = df[
+        (df["service"] == "NAT Gateway") &
+        (df["cpu_avg_7d"] < NAT_IDLE_TRAFFIC_GB)
+    ]
+    for _, r in nats.iterrows():
+        traffic_gb = float(r["cpu_avg_7d"])
+        findings.append({
+            "finding_id":        f"NAT-IDLE-{r['resource_id'][-6:]}",
+            "category":          "Zombie Resource",
+            "severity":          "MEDIUM",
+            "service":           "NAT Gateway",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"NAT Gateway processed only {traffic_gb:.2f} GB in the past 7 days. AWS charges a $32/mo minimum regardless of traffic volume.",
+            "monthly_waste_usd": float(r["monthly_cost_usd"]),
+            "recommendation":    "Delete idle NAT Gateway. Verify no workloads depend on it for outbound internet access before removal.",
+            "cli_fix":           f"aws ec2 delete-nat-gateway --nat-gateway-id {r['resource_id']} --region {r['region']}"
+        })
+    return findings
+
+
+def detect_idle_load_balancers(df):
+    # For ALB/NLB rows: cpu_avg_7d = request_count_7d, memory_avg_7d = target_group_count
+    findings = []
+    lbs = df[
+        (df["service"].isin(["ALB", "NLB"])) &
+        ((df["memory_avg_7d"] == 0) | (df["cpu_avg_7d"] == 0))
+    ]
+    for _, r in lbs.iterrows():
+        tg_count  = int(r["memory_avg_7d"])
+        req_count = int(r["cpu_avg_7d"])
+        reason = "no registered target groups" if tg_count == 0 else "zero requests in the past 7 days"
+        findings.append({
+            "finding_id":        f"LB-IDLE-{r['resource_id'][-6:]}",
+            "category":          "Zombie Resource",
+            "severity":          "MEDIUM",
+            "service":           r["service"],
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"{r['service']} has {reason}. Load balancers cost $16+/mo even with zero traffic.",
+            "monthly_waste_usd": float(r["monthly_cost_usd"]),
+            "recommendation":    "Delete the load balancer and clean up any associated DNS records or ACM certificates.",
+            "cli_fix":           f"aws elbv2 delete-load-balancer --load-balancer-arn {r['resource_id']} --region {r['region']}"
+        })
+    return findings
+
+
+def detect_old_gen_instances(df):
+    findings = []
+    ec2 = df[df["service"] == "EC2"]
+    for _, r in ec2.iterrows():
+        itype = r["resource_type"]
+        if itype not in OLD_GEN_MAP:
+            continue
+        new_type, old_hourly, new_hourly = OLD_GEN_MAP[itype]
+        saving = round((old_hourly - new_hourly) * 24 * 30, 2)
+        if saving <= 0:
+            continue
+        pct = round((old_hourly - new_hourly) / old_hourly * 100)
+        findings.append({
+            "finding_id":        f"OLDGEN-{r['resource_id'][-6:]}",
+            "category":          "Old Generation",
+            "severity":          "LOW",
+            "service":           "EC2",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"Running deprecated {itype}. Upgrading to {new_type} saves ${saving}/mo ({pct}% cheaper) with better CPU performance and no architectural changes.",
+            "monthly_waste_usd": saving,
+            "recommendation":    f"Stop instance, change type to {new_type}, restart. Schedule during next maintenance window.",
+            "cli_fix":           f"aws ec2 stop-instances --instance-ids {r['resource_id']} --region {r['region']} && aws ec2 modify-instance-attribute --instance-id {r['resource_id']} --instance-type {{Value={new_type}}} --region {r['region']}"
+        })
+    return findings
+
+
+def detect_orphan_snapshots(df):
+    findings = []
+    snaps = df[
+        (df["service"] == "EBS Snapshot") &
+        (df["status"].str.contains("orphaned", case=False, na=False)) &
+        (df["days_running"] >= SNAPSHOT_AGE_THRESHOLD)
+    ]
+    for _, r in snaps.iterrows():
+        findings.append({
+            "finding_id":        f"SNAP-ORPHAN-{r['resource_id'][-6:]}",
+            "category":          "Zombie Resource",
+            "severity":          "MEDIUM" if r["monthly_cost_usd"] > 30 else "LOW",
+            "service":           "EBS Snapshot",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"Orphaned snapshot {r['days_running']} days old — source volume no longer exists. Accruing ${r['monthly_cost_usd']}/mo in S3 snapshot storage.",
+            "monthly_waste_usd": float(r["monthly_cost_usd"]),
+            "recommendation":    "Delete orphaned snapshot after confirming the data is no longer needed for recovery.",
+            "cli_fix":           f"aws ec2 delete-snapshot --snapshot-id {r['resource_id']} --region {r['region']}"
+        })
+    return findings
+
+
+def detect_gp2_volumes(df):
+    findings = []
+    gp2 = df[
+        (df["service"] == "EBS") &
+        (df["resource_type"].str.startswith("gp2", na=False))
+    ]
+    for _, r in gp2.iterrows():
+        saving = round(float(r["monthly_cost_usd"]) * GP2_TO_GP3_SAVING_PCT, 2)
+        findings.append({
+            "finding_id":        f"GP2-VOLUME-{r['resource_id'][-6:]}",
+            "category":          "Storage Optimisation",
+            "severity":          "LOW",
+            "service":           "EBS",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"gp2 volume costs ${r['monthly_cost_usd']}/mo. Migrating to gp3 saves 20% (${saving}/mo) and delivers 3x baseline IOPS with 125 MB/s throughput at no extra cost.",
+            "monthly_waste_usd": saving,
+            "recommendation":    "Modify volume type from gp2 to gp3. Zero downtime — change takes effect within minutes.",
+            "cli_fix":           f"aws ec2 modify-volume --volume-id {r['resource_id']} --volume-type gp3 --region {r['region']}"
+        })
+    return findings
+
+
+def detect_ondemand_no_coverage(df):
+    # status "running-ondemand" flags EC2 instances confirmed without RI/SP coverage
+    findings = []
+    ondemand = df[
+        (df["service"] == "EC2") &
+        (df["status"].str.contains("ondemand", case=False, na=False)) &
+        (df["days_running"] >= RI_SP_DAYS_THRESHOLD)
+    ]
+    for _, r in ondemand.iterrows():
+        saving = round(float(r["monthly_cost_usd"]) * RI_SP_SAVING_PCT, 2)
+        findings.append({
+            "finding_id":        f"RI-MISSING-{r['resource_id'][-6:]}",
+            "category":          "RI/SP Optimisation",
+            "severity":          "MEDIUM" if r["monthly_cost_usd"] > 200 else "LOW",
+            "service":           "EC2",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"Instance has run on-demand for {r['days_running']} days with no Reserved Instance or Savings Plan. On-demand is 30-60% more expensive than committed pricing.",
+            "monthly_waste_usd": saving,
+            "recommendation":    "Purchase a 1-year Compute Savings Plan or Reserved Instance. Break-even in ~7 months vs on-demand pricing.",
+            "cli_fix":           f"aws ce get-reservation-purchase-recommendation --service 'Amazon EC2' --region {r['region']}"
+        })
+    return findings
+
+
+def detect_infinite_log_retention(df):
+    findings = []
+    logs = df[
+        (df["service"] == "CloudWatch Logs") &
+        (df["resource_type"].str.contains("infinite", case=False, na=False))
+    ]
+    for _, r in logs.iterrows():
+        saving = round(float(r["monthly_cost_usd"]) * LOG_RETENTION_SAVING_PCT, 2)
+        findings.append({
+            "finding_id":        f"LOG-INFINITE-{r['resource_id'][-7:]}",
+            "category":          "Log Retention",
+            "severity":          "LOW",
+            "service":           "CloudWatch Logs",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"Log group has infinite retention — logs never expire and accumulate at ${r['monthly_cost_usd']}/mo. Setting 30-day retention reduces storage cost by ~70%.",
+            "monthly_waste_usd": saving,
+            "recommendation":    "Set retention policy to 30, 60, or 90 days depending on compliance requirements. Zero downtime.",
+            "cli_fix":           f"aws logs put-retention-policy --log-group-name \"{r['resource_name']}\" --retention-in-days 30 --region {r['region']}"
+        })
+    return findings
+
+
+def detect_stopped_ec2_with_ebs(df):
+    findings = []
+    stopped = df[
+        (df["service"] == "EC2") &
+        (df["status"].str.contains("^stopped$", case=False, na=False, regex=True))
+    ]
+    for _, r in stopped.iterrows():
+        findings.append({
+            "finding_id":        f"STOPPED-EC2-{r['resource_id'][-6:]}",
+            "category":          "Zombie Resource",
+            "severity":          "MEDIUM" if r["monthly_cost_usd"] > 20 else "LOW",
+            "service":           "EC2",
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"Instance stopped for {r['days_running']} days but attached EBS volumes still incur ${r['monthly_cost_usd']}/mo in storage charges with zero utilization.",
+            "monthly_waste_usd": float(r["monthly_cost_usd"]),
+            "recommendation":    "Snapshot and terminate the instance, or detach and delete unused EBS volumes to eliminate storage costs.",
+            "cli_fix":           f"aws ec2 create-image --instance-id {r['resource_id']} --name 'backup-before-terminate' --region {r['region']} && aws ec2 terminate-instances --instance-ids {r['resource_id']} --region {r['region']}"
+        })
+    return findings
+
+
+def detect_underutilized_cache_redshift(df):
+    findings = []
+    cache = df[
+        (df["service"].isin(["ElastiCache", "Redshift"])) &
+        (df["cpu_avg_7d"] < CACHE_CPU_THRESHOLD) &
+        (df["days_running"] >= IDLE_DAYS_THRESHOLD)
+    ]
+    for _, r in cache.iterrows():
+        if r["service"] == "ElastiCache":
+            cli = f"aws elasticache delete-cache-cluster --cache-cluster-id {r['resource_id']} --region {r['region']}"
+        else:
+            cli = f"aws redshift delete-cluster --cluster-identifier {r['resource_id']} --skip-final-cluster-snapshot --region {r['region']}"
+        findings.append({
+            "finding_id":        f"IDLE-{r['service'].upper()[:5]}-{r['resource_id'][-6:]}",
+            "category":          "Idle Resource",
+            "severity":          "HIGH" if r["monthly_cost_usd"] > 100 else "MEDIUM",
+            "service":           r["service"],
+            "resource_id":       r["resource_id"],
+            "resource_name":     r["resource_name"],
+            "region":            r["region"],
+            "team":              r["team"],
+            "environment":       r["environment"],
+            "detail":            f"{r['service']} running at only {r['cpu_avg_7d']}% CPU for {r['days_running']} days — well below {CACHE_CPU_THRESHOLD}% utilization threshold.",
+            "monthly_waste_usd": float(r["monthly_cost_usd"]),
+            "recommendation":    f"Delete or downsize. For ElastiCache consider Serverless (scales to zero). For Redshift use pause/resume scheduling.",
+            "cli_fix":           cli
+        })
+    return findings
+
+
 # ─── Scoring & ranking ────────────────────────────────────────────────────────
 
 SEVERITY_MULTIPLIER = {"HIGH": 1.5, "MEDIUM": 1.0, "LOW": 0.6}
@@ -239,7 +518,16 @@ def run_detection(filepath="aws_cost_data.csv"):
         detect_unattached_ebs(df) +
         detect_unassociated_eips(df) +
         detect_cold_s3(df) +
-        detect_rightsizing(df)
+        detect_rightsizing(df) +
+        detect_nat_idle(df) +
+        detect_idle_load_balancers(df) +
+        detect_old_gen_instances(df) +
+        detect_orphan_snapshots(df) +
+        detect_gp2_volumes(df) +
+        detect_ondemand_no_coverage(df) +
+        detect_infinite_log_retention(df) +
+        detect_stopped_ec2_with_ebs(df) +
+        detect_underutilized_cache_redshift(df)
     )
 
     print(f"Total findings: {len(all_findings)}")

From de4be1dcaa14ff352d74ef8ce1de30a15e0af559 Mon Sep 17 00:00:00 2001
From: Mahima Singh <mahima.singh@perforce.com>
Date: Tue, 26 May 2026 15:14:11 +0530
Subject: [PATCH 2/4] docs: add README with setup guide, CSV download steps,
 and detection table

---
 README.md | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..428be2d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,190 @@
+# 👻 Ghost Busters — Cloud Cost Waste Hunter
+
+> AI-powered AWS cloud cost waste detection tool  
+> Built for **Perforce Global Jam 2026**
+
+---
+
+## What it does
+
+Ghost Busters scans your AWS resource data and automatically identifies wasted spend across 15 detection categories. It combines rule-based detection with Claude AI analysis to produce a prioritised, plain-English report — plus an interactive Streamlit dashboard with an embedded FinOps AI chatbot.
+
+**Current results on sample data: 77 findings · $7,686/mo · $92,238/yr in recoverable waste**
+
+---
+
+## Architecture
+
+```
+aws_cost_data.csv
+       │
+       ▼
+detection_engine.py   ←── 15 rule-based detectors
+       │
+       ▼
+findings.json         ←── structured findings (77 items)
+       │
+       ▼
+llm_analyzer.py       ←── Claude AI plain-English analysis
+       │
+       ▼
+llm_report.json       ←── AI-enriched report
+       │
+       ▼
+dashboard_AI.py       ←── Streamlit dashboard + AI chatbot
+```
+
+---
+
+## Quick start
+
+### 1. Clone the repo
+
+```bash
+git clone https://github.com/smahima27/ghost-buster.git
+cd ghost-buster
+```
+
+### 2. Install dependencies
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install streamlit plotly pandas requests
+```
+
+### 3. Run the detection engine
+
+```bash
+python3 detection_engine.py
+```
+
+Output: `findings.json` with all flagged resources ranked by waste score.
+
+### 4. Run the AI analyser (requires Anthropic API key)
+
+```bash
+export ANTHROPIC_API_KEY='sk-ant-...'
+python3 llm_analyzer.py
+```
+
+Output: `llm_report.json` with plain-English explanations and business impact.
+
+### 5. Launch the dashboard
+
+```bash
+export ANTHROPIC_API_KEY='sk-ant-...'
+streamlit run dashboard_AI.py
+```
+
+Opens at http://localhost:8501
+
+---
+
+## Download the sample CSV
+
+The sample dataset (`aws_cost_data.csv`) is included in the repo with **97 simulated AWS resources** across EC2, RDS, EBS, S3, NAT Gateway, ALB/NLB, ElastiCache, Redshift, CloudWatch Logs, and Elastic IPs.
+
+**Option 1 — via Git (recommended):**
+```bash
+git clone https://github.com/smahima27/ghost-buster.git
+# CSV is at ghost-buster/aws_cost_data.csv
+```
+
+**Option 2 — direct download (raw file):**
+```
+https://raw.githubusercontent.com/smahima27/ghost-buster/feature/new-detectors/aws_cost_data.csv
+```
+
+**Option 3 — GitHub UI:**
+1. Go to https://github.com/smahima27/ghost-buster
+2. Click `aws_cost_data.csv`
+3. Click **Download raw file** (top right)
+
+---
+
+## CSV schema
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `resource_id` | string | AWS resource ID (e.g. `i-0abc123`) |
+| `resource_name` | string | Human-readable name |
+| `service` | string | AWS service (EC2, RDS, EBS, S3, etc.) |
+| `resource_type` | string | Instance type or volume type |
+| `region` | string | AWS region |
+| `team` | string | Owning team |
+| `environment` | string | dev / staging / prod / sandbox |
+| `cpu_avg_7d` | float | 7-day average CPU % (or traffic GB/week for NAT GW; request count for ALB/NLB) |
+| `memory_avg_7d` | float | 7-day average memory % (or target group count for ALB/NLB) |
+| `daily_cost_usd` | float | Daily cost in USD |
+| `monthly_cost_usd` | float | Monthly cost in USD |
+| `days_running` | int | Days the resource has been running |
+| `last_accessed` | date | Last access date (YYYY-MM-DD) |
+| `status` | string | running / stopped / unattached / orphaned / etc. |
+| `tags` | string | Key:value tag pairs |
+
+---
+
+## Detection categories
+
+| # | Detector | Trigger condition | Category |
+|---|----------|------------------|----------|
+| 1 | Idle EC2 | CPU < 5% for 7+ days | Idle Resource |
+| 2 | Idle RDS | CPU < 5% for 7+ days | Idle Resource |
+| 3 | Unattached EBS | Status contains "unattached" | Zombie Resource |
+| 4 | Unassociated EIP | Service = Elastic IP | Zombie Resource |
+| 5 | Cold S3 | Not accessed in 60+ days | Storage Optimisation |
+| 6 | Rightsizing | CPU 5–20%, known instance type map | Rightsizing |
+| 7 | Idle NAT Gateway | Traffic < 1 GB/week | Zombie Resource |
+| 8 | Idle ALB/NLB | 0 target groups or 0 requests | Zombie Resource |
+| 9 | Old-gen instances | t2/m4/c4/r4 families | Old Generation |
+| 10 | Orphaned snapshots | EBS snapshot > 90 days, no source volume | Zombie Resource |
+| 11 | gp2 → gp3 migration | EBS volume type starts with "gp2" | Storage Optimisation |
+| 12 | On-demand no RI/SP | Running 30+ days without Reserved Instance/Savings Plan | RI/SP Optimisation |
+| 13 | Infinite log retention | CloudWatch log group with no expiry | Log Retention |
+| 14 | Stopped EC2 with EBS | Status = "stopped", paying for attached volumes | Zombie Resource |
+| 15 | Underutilised cache/DW | ElastiCache or Redshift CPU < 10% | Idle Resource |
+
+---
+
+## Dashboard features
+
+- **Metric cards** — monthly/annual opportunity, finding count
+- **AI executive summary** — Claude-generated plain-English overview
+- **Cost by service bar chart** + **opportunity by category donut chart**
+- **Quick wins** — top 3 actionable items
+- **Filterable findings** — by category and severity with remediation CLI toggle
+- **FinOps AI chatbot** — ask anything about your AWS costs (powered by Claude)
+- **Slack webhook** — fire a top-finding alert to any Slack channel
+
+---
+
+## Project structure
+
+```
+ghost-buster/
+├── aws_cost_data.csv      # Sample AWS resource data (97 rows)
+├── detection_engine.py    # 15 rule-based waste detectors
+├── llm_analyzer.py        # Claude AI report generator
+├── dashboard.py           # Basic Streamlit dashboard
+├── dashboard_AI.py        # Enhanced dashboard with AI chatbot
+├── findings.json          # Output of detection_engine.py
+├── llm_report.json        # Output of llm_analyzer.py
+└── README.md
+```
+
+---
+
+## Environment variables
+
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `ANTHROPIC_API_KEY` | Yes (for AI steps) | Anthropic API key for Claude |
+
+**Never commit API keys to source control.** Use `export ANTHROPIC_API_KEY='sk-ant-...'` in your shell before running.
+
+---
+
+## Team
+
+Built by **Team Ghost Busters** for Perforce Global Jam 2026.

From ce4185426024feb4c91a1d65c1fd50a31b7abd8a Mon Sep 17 00:00:00 2001
From: Mahima Singh <mahima.singh@perforce.com>
Date: Tue, 26 May 2026 15:19:12 +0530
Subject: [PATCH 3/4] feat: make CSV and findings paths configurable via env
 vars (GHOSTBUSTERS_CSV, GHOSTBUSTERS_FINDINGS)

---
 detection_engine.py | 4 +++-
 llm_analyzer.py     | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/detection_engine.py b/detection_engine.py
index 0612223..d5f96f3 100644
--- a/detection_engine.py
+++ b/detection_engine.py
@@ -561,4 +561,6 @@ def run_detection(filepath="aws_cost_data.csv"):
     return output
 
 if __name__ == "__main__":
-    run_detection("aws_cost_data.csv")
+    import os
+    csv_path = os.environ.get("GHOSTBUSTERS_CSV", "aws_cost_data.csv")
+    run_detection(csv_path)
diff --git a/llm_analyzer.py b/llm_analyzer.py
index 5b7790a..e4e4ed2 100644
--- a/llm_analyzer.py
+++ b/llm_analyzer.py
@@ -143,4 +143,6 @@ def analyze(findings_path="findings.json"):
     return report
 
 if __name__ == "__main__":
-    analyze("findings.json")
+    import os
+    findings_path = os.environ.get("GHOSTBUSTERS_FINDINGS", "findings.json")
+    analyze(findings_path)

From 0e2d42ff880a492ef97a382a430f33078d240285 Mon Sep 17 00:00:00 2001
From: Mahima Singh <mahima.singh@perforce.com>
Date: Tue, 26 May 2026 15:32:28 +0530
Subject: [PATCH 4/4] feat: add Untagged Resources panel to dashboard with
 metrics, charts, table and CLI fix

---
 dashboard_AI.py | 180 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 179 insertions(+), 1 deletion(-)

diff --git a/dashboard_AI.py b/dashboard_AI.py
index 38bdefc..3fdf7cc 100644
--- a/dashboard_AI.py
+++ b/dashboard_AI.py
@@ -1,4 +1,4 @@
-import json, os, urllib.request
+import json, os, urllib.request, glob
 import streamlit as st
 import plotly.express as px
 import pandas as pd
@@ -38,6 +38,12 @@
 .chat-bubble-user{background:#EFF6FF;border-radius:10px;padding:8px 12px;margin:4px 0;font-size:0.84rem;color:#1E3A5F}
 .chat-bubble-ai{background:#F8FAFC;border:1px solid #E2E8F0;border-radius:10px;padding:8px 12px;margin:4px 0;font-size:0.84rem;color:#1E293B}
 .sug-btn{font-size:0.75rem}
+.untagged-banner{background:#fff7ed;border-left:4px solid #f59e0b;border-radius:0 8px 8px 0;padding:12px 16px;font-size:0.86rem;color:#92400e;margin-bottom:16px}
+.untagged-row{background:white;border-radius:8px;padding:10px 14px;border:1px solid #fde68a;margin-bottom:6px;display:flex;justify-content:space-between;align-items:center}
+.untagged-id{font-family:monospace;font-size:0.8rem;color:#1a1a2e;font-weight:600}
+.untagged-cost{font-size:0.84rem;font-weight:700;color:#e05252}
+.untagged-svc{font-size:0.72rem;background:#fef3c7;color:#92400e;padding:2px 8px;border-radius:99px;font-weight:600}
+.tag-cli{background:#1e1e2e;color:#fbbf24;font-family:monospace;font-size:0.74rem;padding:6px 10px;border-radius:6px;margin-top:4px;overflow-x:auto;white-space:nowrap}
 </style>
 """, unsafe_allow_html=True)
 
@@ -311,6 +317,178 @@ def call_claude(messages):
     st.markdown("---")
     st.markdown("#### 📋 Leadership recommendation")
     st.info(report.get("closing_recommendation", ""))
+
+    # ── Untagged Resources Panel ───────────────────────────────────────────────
+    st.markdown("---")
+    st.markdown("#### 🏷️ Untagged Resources")
+
+    @st.cache_data
+    def load_resource_csv():
+        """Load converted_costs.csv if available, else fall back to aws_cost_data.csv."""
+        for candidate in ["converted_costs.csv", "aws_cost_data.csv"]:
+            path = os.environ.get("GHOSTBUSTERS_CSV", candidate)
+            if os.path.exists(path):
+                try:
+                    return pd.read_csv(path)
+                except Exception:
+                    continue
+        return pd.DataFrame()
+
+    rdf = load_resource_csv()
+
+    if rdf.empty:
+        st.info("No resource CSV loaded. Run the detection pipeline first.")
+    else:
+        # Identify untagged: team is 'untagged', missing, or environment is 'unknown'
+        def is_untagged(row):
+            team = str(row.get("team", "")).strip().lower()
+            env  = str(row.get("environment", "")).strip().lower()
+            tags = str(row.get("tags", "")).strip().lower()
+            return (
+                team in ("untagged", "", "nan", "none") or
+                env  in ("unknown", "", "nan", "none") or
+                tags in ("source:cost-explorer", "", "nan", "none")
+            )
+
+        untagged_df = rdf[rdf.apply(is_untagged, axis=1)].copy()
+        tagged_df   = rdf[~rdf.apply(is_untagged, axis=1)].copy()
+
+        total_resources  = len(rdf)
+        untagged_count   = len(untagged_df)
+        untagged_cost    = untagged_df["monthly_cost_usd"].sum() if "monthly_cost_usd" in untagged_df.columns else 0
+        total_cost       = rdf["monthly_cost_usd"].sum() if "monthly_cost_usd" in rdf.columns else 0
+        untagged_pct     = round(untagged_count / total_resources * 100, 1) if total_resources else 0
+        untagged_cost_pct= round(untagged_cost / total_cost * 100, 1) if total_cost else 0
+
+        # Metric cards row
+        ut1, ut2, ut3, ut4 = st.columns(4)
+        with ut1:
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Untagged resources</div>
+                <div class="metric-value" style="color:#f59e0b">{untagged_count}</div>
+                <div class="metric-sub" style="color:#f59e0b">{untagged_pct}% of total</div>
+            </div>""", unsafe_allow_html=True)
+        with ut2:
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Untagged monthly spend</div>
+                <div class="metric-value" style="color:#e05252">${untagged_cost:,.0f}</div>
+                <div class="metric-sub">{untagged_cost_pct}% of total spend</div>
+            </div>""", unsafe_allow_html=True)
+        with ut3:
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Tagged resources</div>
+                <div class="metric-value" style="color:#10b981">{len(tagged_df)}</div>
+                <div class="metric-sub">{100-untagged_pct}% coverage</div>
+            </div>""", unsafe_allow_html=True)
+        with ut4:
+            st.markdown(f"""<div class="metric-card">
+                <div class="metric-label">Untagged annual cost</div>
+                <div class="metric-value" style="color:#e05252">${untagged_cost*12:,.0f}</div>
+                <div class="metric-sub">no ownership visibility</div>
+            </div>""", unsafe_allow_html=True)
+
+        st.markdown("<br>", unsafe_allow_html=True)
+
+        if untagged_count == 0:
+            st.success("✅ All resources are tagged. Great governance!")
+        else:
+            st.markdown(
+                f'<div class="untagged-banner">⚠️ <strong>{untagged_count} resources ({untagged_pct}%) have no team/environment tags</strong> — '
+                f'accounting for <strong>${untagged_cost:,.2f}/mo</strong> of spend with no ownership visibility. '
+                f'Without tags you cannot chargeback costs, enforce policies, or identify owners when issues arise.</div>',
+                unsafe_allow_html=True
+            )
+
+            # Donut: tagged vs untagged by cost
+            ut_chart_l, ut_chart_r = st.columns(2)
+            with ut_chart_l:
+                st.markdown("**Tagging coverage by spend**")
+                tag_pie = pd.DataFrame([
+                    {"Status": "Untagged", "Cost": round(untagged_cost, 2)},
+                    {"Status": "Tagged",   "Cost": round(total_cost - untagged_cost, 2)},
+                ])
+                fig_tag = px.pie(tag_pie, values="Cost", names="Status",
+                    color_discrete_map={"Untagged": "#f59e0b", "Tagged": "#10b981"}, hole=0.5)
+                fig_tag.update_traces(textposition="outside", textinfo="label+percent")
+                fig_tag.update_layout(showlegend=False, paper_bgcolor="white",
+                    margin=dict(l=0,r=0,t=10,b=0), height=220)
+                st.plotly_chart(fig_tag, use_container_width=True)
+
+            with ut_chart_r:
+                st.markdown("**Untagged spend by service**")
+                if "service" in untagged_df.columns:
+                    svc_untagged = (
+                        untagged_df.groupby("service")["monthly_cost_usd"]
+                        .sum().reset_index()
+                        .sort_values("monthly_cost_usd", ascending=True)
+                        .tail(8)
+                    )
+                    svc_untagged.columns = ["Service", "Cost"]
+                    fig_svc = px.bar(svc_untagged, x="Cost", y="Service", orientation="h",
+                        color="Cost", color_continuous_scale=["#fef3c7", "#f59e0b"], text="Cost")
+                    fig_svc.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+                    fig_svc.update_layout(showlegend=False, coloraxis_showscale=False,
+                        plot_bgcolor="white", paper_bgcolor="white",
+                        margin=dict(l=0,r=60,t=10,b=0), height=220,
+                        yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0"))
+                    st.plotly_chart(fig_svc, use_container_width=True)
+
+            # Table of untagged resources
+            st.markdown("<br>", unsafe_allow_html=True)
+            st.markdown("**Resources missing tags** — sorted by monthly cost")
+
+            show_cols = [c for c in ["resource_id","resource_name","service","region","team","environment","monthly_cost_usd","tags"] if c in untagged_df.columns]
+            display_df = (
+                untagged_df[show_cols]
+                .sort_values("monthly_cost_usd", ascending=False)
+                .reset_index(drop=True)
+            )
+            display_df.index += 1
+
+            # Search filter
+            tag_search = st.text_input("🔍 Filter by resource ID or service", placeholder="e.g. vol- or EC2", key="tag_search")
+            if tag_search:
+                mask = display_df.apply(lambda row: tag_search.lower() in str(row).lower(), axis=1)
+                display_df = display_df[mask]
+
+            st.dataframe(
+                display_df,
+                use_container_width=True,
+                height=min(400, 40 + len(display_df) * 35),
+                column_config={
+                    "monthly_cost_usd": st.column_config.NumberColumn("Monthly Cost ($)", format="$%.2f"),
+                    "resource_id":      st.column_config.TextColumn("Resource ID"),
+                    "resource_name":    st.column_config.TextColumn("Name"),
+                    "service":          st.column_config.TextColumn("Service"),
+                    "region":           st.column_config.TextColumn("Region"),
+                    "team":             st.column_config.TextColumn("Team"),
+                    "environment":      st.column_config.TextColumn("Environment"),
+                    "tags":             st.column_config.TextColumn("Tags"),
+                }
+            )
+
+            # Tagging CLI helper
+            st.markdown("<br>", unsafe_allow_html=True)
+            st.markdown("**Fix it — bulk tag via AWS CLI:**")
+            top_untagged = untagged_df.sort_values("monthly_cost_usd", ascending=False).head(3)
+            for _, row in top_untagged.iterrows():
+                rid = row.get("resource_id", "")
+                region = row.get("region", "us-east-1")
+                svc = str(row.get("service", "")).lower()
+                if "ec2" in svc or rid.startswith(("i-", "vol-", "snap-")):
+                    cli = f"aws ec2 create-tags --resources {rid} --tags Key=team,Value=your-team Key=environment,Value=prod Key=owner,Value=your-name --region {region}"
+                elif "rds" in svc:
+                    cli = f"aws rds add-tags-to-resource --resource-name {rid} --tags Key=team,Value=your-team Key=environment,Value=prod --region {region}"
+                elif "s3" in svc:
+                    cli = f"aws s3api put-bucket-tagging --bucket {rid} --tagging 'TagSet=[{{Key=team,Value=your-team}},{{Key=environment,Value=prod}}]'"
+                else:
+                    cli = f"aws resourcegroupstaggingapi tag-resources --resource-arn-list {rid} --tags team=your-team,environment=prod,owner=your-name --region {region}"
+                st.markdown(
+                    f'<div class="tag-cli">$ {cli}</div>',
+                    unsafe_allow_html=True
+                )
+
+    st.markdown("---")
     st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter")
 
 # ── RIGHT PANEL: FinOps AI Chatbot ────────────────────────────────────────────