diff --git a/.gitignore b/.gitignore
index c09b568..145b9e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,30 +1,51 @@
-# API keys and secrets — NEVER commit these
-SM_api_key
-*.key
-.env
-.env.*
-secrets/
+# Local .terraform directories
+**/.terraform/*
+
+# .tfstate files
+*.tfstate
+*.tfstate.*
+
+# Crash log files
+crash.log
+
+# Ignore any .tfvars files that are generated automatically for each Terraform run. Most
+# .tfvars files are managed as part of configuration and so should be included in
+# version control.
+#
+# example.tfvars
+
+# Ignore override files as they are usually used to override resources locally and so
+# are not checked in
+override.tf
+override.tf.json
+*_override.tf
+*_override.tf.json
+
+# Include override files you do wish to add to version control using negated pattern
+#
+# !example_override.tf
-# Python virtual environment
-.venv/
+# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
+# example: *tfplan*
+plan.out
+.terraform.lock.hcl
+.DS_Store
+
+# Python
venv/
-env/
__pycache__/
*.pyc
*.pyo
-*.pyd
-.Python
-# Generated output files (re-created by pipeline)
+# Generated analysis outputs
findings.json
+s3_analysis.json
+billing_report.json
llm_report.json
-# macOS
-.DS_Store
+# Real AWS billing exports (contain account data — do not commit)
+costs.csv
+costs_with_s3.csv
-# IDE
-.vscode/
-.idea/
-
-# Zip archives
-*.zip
+# Sensitive credentials
+SM_api_key
diff --git a/dashboard.py b/dashboard.py
index e42634f..af8e529 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -1,281 +1,686 @@
-import json
+import json, os, urllib.request
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import requests
-from datetime import datetime
-# ─── Page config ─────────────────────────────────────────────────────────────
-st.set_page_config(
- page_title="Cloud Cost Waste Hunter",
- page_icon="👻",
- layout="wide",
- initial_sidebar_state="expanded"
-)
+st.set_page_config(page_title="Cloud Cost Waste Hunter", page_icon="👻",
+ layout="wide", initial_sidebar_state="expanded")
-# ─── Custom CSS ───────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
-# ─── Load report ─────────────────────────────────────────────────────────────
+# ── Load report ────────────────────────────────────────────────────────────────
@st.cache_data
def load_report(path="llm_report.json"):
with open(path) as f:
return json.load(f)
-report = load_report()
-findings = report["findings"]
-all_f = report.get("all_findings", [])
-team_data = report.get("team_breakdown", {})
-quick_wins = report.get("quick_wins", [])
+@st.cache_data
+def load_s3_analysis(path="s3_analysis.json"):
+ if not os.path.exists(path):
+ return None
+ with open(path) as f:
+ return json.load(f)
+
+report = load_report()
+s3_data = load_s3_analysis()
+quick_wins = report.get("quick_wins", [])
+raw_findings = report.get("findings", [])
+
+def normalise(f):
+ return {
+ "rank": f.get("rank", 0),
+ "name": f.get("service", f.get("resource_name", "Unknown")),
+ "category": f.get("category", f.get("flag", "—")),
+ "plain_english": f.get("plain_english", ""),
+ "business_impact": f.get("business_impact", ""),
+ "monthly_saving": f.get("monthly_opportunity", f.get("monthly_saving", 0.0)),
+ "priority_action": f.get("priority_action", ""),
+ "aws_action": f.get("aws_action", f.get("cli_fix", "")),
+ "severity": f.get("severity", "HIGH" if f.get("monthly_opportunity", f.get("monthly_saving", 0)) > 100 else "MEDIUM"),
+ }
-# ─── Sidebar ─────────────────────────────────────────────────────────────────
+findings = [normalise(f) for f in raw_findings]
+total_monthly = report.get("total_monthly_opportunity", report.get("total_monthly_waste", 0))
+total_annual = report.get("total_annual_waste", total_monthly * 12)
+total_spend = report.get("total_monthly_spend", 0)
+raw_services = report.get("raw_data", {}).get("services", [])
+all_f_legacy = report.get("all_findings", [])
+
+# ── Claude chatbot helpers ─────────────────────────────────────────────────────
+def build_context():
+ lines = [
+ "You are a senior FinOps engineer assistant in the Ghost Busters Cloud Cost Waste Hunter dashboard.",
+ "Answer clearly and concisely, grounding every response in the actual account data below.",
+ "Keep answers to 3-5 sentences unless the user asks for detail.",
+ "",
+ f"Data source: {report.get('source', 'AWS Cost Explorer')}",
+ f"Monthly spend: ${total_spend:,.2f}" if total_spend else "",
+ f"Monthly opportunity: ${total_monthly:,.2f}",
+ f"Executive summary: {report.get('executive_summary', '')}",
+ "",
+ "FINDINGS:",
+ ]
+ for fi in raw_findings:
+ lines.append(
+ f"#{fi.get('rank','')} {fi.get('service', fi.get('resource_name',''))} | "
+ f"${fi.get('monthly_opportunity', fi.get('monthly_saving', 0)):,.2f}/mo | "
+ f"{fi.get('plain_english','')[:120]} | "
+ f"Action: {fi.get('priority_action','')[:80]}"
+ )
+ lines += ["", "QUICK WINS:"] + [f"- {w}" for w in quick_wins]
+ sb = report.get("service_breakdown", {})
+ if sb:
+ lines += [
+ f"Biggest concern: {sb.get('biggest_concern','')}",
+ f"Watch list: {', '.join(sb.get('watch_list',[]))}",
+ ]
+ lines.append(f"Recommendation: {report.get('closing_recommendation','')}")
+ return "\n".join(l for l in lines if l is not None)
+
+def call_claude(messages):
+ api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+ if not api_key:
+ return "⚠️ ANTHROPIC_API_KEY not set. Run `export ANTHROPIC_API_KEY='sk-ant-...'` then restart Streamlit."
+ try:
+ payload = json.dumps({
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": 800,
+ "system": build_context(),
+ "messages": messages
+ }).encode()
+ req = urllib.request.Request(
+ "https://api.anthropic.com/v1/messages",
+ data=payload,
+ headers={"Content-Type":"application/json",
+ "x-api-key":api_key,
+ "anthropic-version":"2023-06-01"},
+ method="POST"
+ )
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ data = json.loads(resp.read().decode())
+ return data["content"][0]["text"]
+ except Exception as e:
+ return f"❌ Error: {e}"
+
+if "chat_history" not in st.session_state:
+ st.session_state.chat_history = []
+
+# ── Sidebar ────────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("## 👻 Ghost Busters")
st.markdown("*Cloud Cost Waste Hunter*")
st.markdown("---")
-
- all_teams = sorted(set(f["team"] for f in findings))
- selected_teams = st.multiselect(
- "Filter by team", all_teams, default=all_teams
- )
-
- all_severities = ["HIGH", "MEDIUM", "LOW"]
- selected_sev = st.multiselect(
- "Filter by severity", all_severities, default=all_severities
- )
-
+ categories = sorted(set(f["category"] for f in findings))
+ selected_cats = st.multiselect("Filter by category", categories, default=categories)
+ selected_sev = st.multiselect("Filter by severity", ["HIGH","MEDIUM","LOW"], default=["HIGH","MEDIUM","LOW"])
st.markdown("---")
st.markdown("**Slack webhook alert**")
slack_url = st.text_input("Webhook URL", placeholder="https://hooks.slack.com/...")
-
if st.button("🔔 Fire top finding alert", use_container_width=True):
if slack_url and findings:
top = findings[0]
- payload = {
- "blocks": [
- {"type": "header", "text": {"type": "plain_text",
- "text": "👻 Cloud Cost Waste Hunter Alert"}},
- {"type": "section", "text": {"type": "mrkdwn",
- "text": f"*#{top['rank']} — {top['resource_name']}*\n{top['plain_english']}"}},
- {"type": "section", "fields": [
- {"type": "mrkdwn", "text": f"*Monthly saving*\n${top['monthly_saving']:,.2f}"},
- {"type": "mrkdwn", "text": f"*Team*\n{top['team']}"},
- {"type": "mrkdwn", "text": f"*Action*\n{top['priority_action'][:80]}..."}
- ]},
- {"type": "divider"},
- {"type": "section", "text": {"type": "mrkdwn",
- "text": f"*Total waste across environment:* ${report['total_monthly_waste']:,.2f}/mo (${report['total_annual_waste']:,.2f}/yr)"}}
- ]
- }
+ payload = {"blocks":[
+ {"type":"header","text":{"type":"plain_text","text":"👻 Cloud Cost Waste Hunter Alert"}},
+ {"type":"section","text":{"type":"mrkdwn","text":f"*#{top['rank']} — {top['name']}*\n{top['plain_english']}"}},
+ {"type":"section","fields":[
+ {"type":"mrkdwn","text":f"*Opportunity*\n${top['monthly_saving']:,.2f}/mo"},
+ {"type":"mrkdwn","text":f"*Action*\n{top['priority_action'][:80]}..."}
+ ]},
+ {"type":"section","text":{"type":"mrkdwn","text":f"*Total opportunity:* ${total_monthly:,.2f}/mo"}}
+ ]}
try:
r = requests.post(slack_url, json=payload, timeout=5)
- if r.status_code == 200:
- st.success("✅ Alert sent!")
- else:
- st.error(f"Failed: {r.status_code}")
+ st.success("✅ Sent!") if r.status_code==200 else st.error(f"Failed: {r.status_code}")
except Exception as e:
- st.error(f"Error: {e}")
+ st.error(str(e))
else:
st.warning("Enter a Slack webhook URL first")
-
st.markdown("---")
- st.caption(f"Report generated: {report.get('generated_at','—')}")
+ st.caption(f"Generated: {report.get('generated_at','—')}")
+ if report.get("source"): st.caption(f"Source: {report['source']}")
-# ─── Header ───────────────────────────────────────────────────────────────────
+# ── Page header ───────────────────────────────────────────────────────────────
st.markdown('
-
FINDING #{f['rank']}
-
{f['resource_name']}
-
{f['plain_english']}
-
- Impact: {f['business_impact']}
-
-
- {sev.upper()}
- 👤 {f['team']}
- ☁️ {raw.get('service','')}
- 💰 ${f['monthly_saving']:,.2f}/mo saving
-
-
- 🔧 {f['priority_action']}
-
- {cli_html}
+st.markdown('', unsafe_allow_html=True)
+if report.get("source"):
+ st.markdown(f'
📊 {report["source"]}', unsafe_allow_html=True)
+
+# ── MAIN LAYOUT: left 62% content | right 38% chatbot ─────────────────────────
+main_col, chat_col = st.columns([0.62, 0.38])
+
+with main_col:
+ tab_overview, tab_s3 = st.tabs(["📊 Overview", "🪣 S3 Deep Dive"])
+
+ # ── TAB 1: Overview ───────────────────────────────────────────────────────
+ with tab_overview:
+ # Metric cards
+ c1, c2, c3, c4 = st.columns(4)
+ with c1:
+ st.markdown(f"""
+
Monthly opportunity
+
${total_monthly:,.0f}
+
recoverable now
+
""", unsafe_allow_html=True)
+ with c2:
+ st.markdown(f"""
+
Annual opportunity
+
${total_annual:,.0f}
+
if unaddressed
+
""", unsafe_allow_html=True)
+ with c3:
+ st.markdown(f"""
+
Findings
+
{len(findings)}
+
services flagged
+
""", unsafe_allow_html=True)
+ with c4:
+ if total_spend > 0:
+ pct = round((total_monthly / total_spend) * 100, 1)
+ st.markdown(f"""
+
Total spend
+
${total_spend:,.0f}
+
{pct}% recoverable
+
""", unsafe_allow_html=True)
+ else:
+ top_f = findings[0] if findings else {}
+ st.markdown(f"""
+
Top finding
+
{top_f.get('name','—')[:12]}
+
${top_f.get('monthly_saving',0):,.0f}/mo
+
""", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # AI summary
+ st.markdown(f'
🤖 AI Summary
{report["executive_summary"]}
',
+ unsafe_allow_html=True)
+
+ # Charts
+ chart_l, chart_r = st.columns(2)
+ with chart_l:
+ st.markdown("#### Cost by service")
+ src = raw_services or []
+ if src:
+ svc_df = pd.DataFrame([
+ {"Service": s["service"][:22], "April ($)": s["apr_2026"]}
+ for s in sorted(src, key=lambda x: -x["apr_2026"])[:8]
+ ])
+ fig = px.bar(svc_df, x="April ($)", y="Service", orientation="h",
+ color="April ($)", color_continuous_scale=["#fde8e8","#e05252"], text="April ($)")
+ fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+ fig.update_layout(showlegend=False, coloraxis_showscale=False,
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=60,t=10,b=0), height=260,
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+ st.plotly_chart(fig, use_container_width=True)
+ elif all_f_legacy:
+ svc_t = {}
+ for f in all_f_legacy:
+ svc_t[f.get("service","Other")] = svc_t.get(f.get("service","Other"),0)+f.get("monthly_waste_usd",0)
+ sdf = pd.DataFrame([{"Service":k,"Waste ($)":round(v,2)} for k,v in sorted(svc_t.items(),key=lambda x:-x[1])])
+ fig = px.bar(sdf,x="Waste ($)",y="Service",orientation="h",
+ color="Waste ($)",color_continuous_scale=["#fde8e8","#e05252"],text="Waste ($)")
+ fig.update_traces(texttemplate="$%{text:,.0f}",textposition="outside")
+ fig.update_layout(showlegend=False,coloraxis_showscale=False,
+ plot_bgcolor="white",paper_bgcolor="white",
+ margin=dict(l=0,r=60,t=10,b=0),height=260,
+ yaxis=dict(showgrid=False),xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+ st.plotly_chart(fig, use_container_width=True)
+
+ with chart_r:
+ st.markdown("#### Opportunity by category")
+ cat_t = {}
+ for f in findings:
+ cat_t[f["category"]] = cat_t.get(f["category"],0) + f["monthly_saving"]
+ if cat_t:
+ cdf = pd.DataFrame([{"Category":k,"Opp ($)":round(v,2)} for k,v in sorted(cat_t.items(),key=lambda x:-x[1]) if v>0])
+ fig2 = px.pie(cdf,values="Opp ($)",names="Category",
+ color_discrete_sequence=["#e05252","#f59e0b","#3b82f6","#8b5cf6","#10b981"],hole=0.45)
+ fig2.update_traces(textposition="outside",textinfo="label+percent")
+ fig2.update_layout(showlegend=False,paper_bgcolor="white",
+ margin=dict(l=0,r=0,t=10,b=0),height=260)
+ st.plotly_chart(fig2, use_container_width=True)
+
+ # Quick wins
+ if quick_wins:
+ st.markdown("#### ⚡ Quick wins")
+ for w in quick_wins[:3]:
+ st.markdown(f'
✅ {w}
', unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Findings
+ st.markdown("#### 🔍 Flagged services")
+ filtered = [f for f in findings if f["category"] in selected_cats and f["severity"] in selected_sev]
+ if not filtered:
+ st.info("No findings match filters.")
+ else:
+ show_action = st.toggle("Show AWS remediation actions", value=False)
+ for f in filtered:
+ sev = f["severity"].lower()
+ action_html = f'
$ {f["aws_action"]}
' if show_action and f["aws_action"] else ""
+ saving = f"${f['monthly_saving']:,.2f}/mo opportunity" if f["monthly_saving"] > 0 else "Investigate"
+ st.markdown(f"""
+
+
FINDING #{f['rank']}
+
{f['name']}
+
{f['plain_english']}
+
Impact: {f['business_impact']}
+
+ {f['severity']}
+ 🏷 {f['category']}
+ 💰 {saving}
+
+
🔧 {f['priority_action']}
+ {action_html}
+
""", unsafe_allow_html=True)
+
+ # Service insights
+ sb = report.get("service_breakdown", {})
+ if sb:
+ st.markdown("---")
+ st.markdown("#### 📊 Service insights")
+ si1, si2 = st.columns(2)
+ with si1:
+ if sb.get("biggest_concern"): st.error(f"🚨 **Biggest concern:** {sb['biggest_concern']}")
+ if sb.get("most_improved"): st.success(f"✅ **Most improved:** {sb['most_improved']}")
+ with si2:
+ if sb.get("watch_list"): st.warning(f"👀 **Watch list:** {', '.join(sb['watch_list'])}")
+
+ st.markdown("---")
+ st.markdown("#### 📋 Leadership recommendation")
+ st.info(report.get("closing_recommendation", ""))
+ st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter")
+
+ # ── TAB 2: S3 Deep Dive ───────────────────────────────────────────────────
+ with tab_s3:
+ if s3_data is None:
+ st.warning("Run `python3 detection_engine.py` first to generate s3_analysis.json")
+ else:
+ buckets = s3_data["buckets"]
+ tier_summary = s3_data.get("tier_summary", {})
+ bdf = pd.DataFrame(buckets)
+ TIER_COLOR = {"Active": "#10b981", "Infrequent": "#f59e0b",
+ "Cold": "#f97316", "Frozen": "#e05252"}
+ annual_saving = round(s3_data["potential_saving"] * 12, 0)
+ saving_pct = round(
+ s3_data["potential_saving"] / s3_data["total_monthly_cost"] * 100, 1
+ ) if s3_data["total_monthly_cost"] else 0
+
+ # ── S3 metric cards ────────────────────────────────────────────────
+ m1, m2, m3, m4 = st.columns(4)
+ with m1:
+ st.markdown(f"""
+
Total S3 spend
+
${s3_data['total_monthly_cost']:,.0f}/mo
+
{s3_data['total_buckets']} buckets tracked
+
""", unsafe_allow_html=True)
+ with m2:
+ st.markdown(f"""
+
Potential monthly savings
+
${s3_data['potential_saving']:,.0f}/mo
+
{saving_pct}% of S3 spend recoverable
+
""", unsafe_allow_html=True)
+ with m3:
+ st.markdown(f"""
+
Annual savings opportunity
+
${annual_saving:,.0f}
+
if actioned today
+
""", unsafe_allow_html=True)
+ with m4:
+ st.markdown(f"""
+
Total stored
+
{s3_data['total_size_gb']:,.0f} GB
+
{s3_data['terminate_candidates']} bucket(s) ready for deletion
+
""", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # ── Charts row ────────────────────────────────────────────────────
+ ch1, ch2 = st.columns(2)
+ with ch1:
+ st.markdown("#### Buckets by access tier")
+ if tier_summary:
+ tier_df = pd.DataFrame([
+ {"Tier": t, "Buckets": v["count"],
+ "Size (GB)": v["size_gb"], "Cost ($)": v["cost"]}
+ for t, v in tier_summary.items()
+ ])
+ tier_order = ["Active", "Infrequent", "Cold", "Frozen"]
+ tier_df["Tier"] = pd.Categorical(
+ tier_df["Tier"], categories=[t for t in tier_order if t in tier_df["Tier"].values], ordered=True
+ )
+ tier_df = tier_df.sort_values("Tier")
+ colors = [TIER_COLOR.get(t, "#888") for t in tier_df["Tier"]]
+ fig_tier = px.bar(
+ tier_df, x="Tier", y="Size (GB)", color="Tier",
+ color_discrete_map=TIER_COLOR,
+ text="Buckets",
+ custom_data=["Cost ($)"],
+ )
+ fig_tier.update_traces(
+ texttemplate="%{text} bucket(s)",
+ textposition="outside",
+ hovertemplate="
%{x}Size: %{y:,.0f} GB
Cost: $%{customdata[0]:,.2f}/mo
"
+ )
+ fig_tier.update_layout(
+ showlegend=False, plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=0,t=10,b=0), height=260,
+ xaxis=dict(showgrid=False), yaxis=dict(showgrid=True, gridcolor="#f0f0f0", title="Storage (GB)")
+ )
+ st.plotly_chart(fig_tier, use_container_width=True)
+
+ with ch2:
+ st.markdown("#### Current cost vs potential saving — top buckets")
+ top_bdf = bdf.nlargest(min(10, len(bdf)), "monthly_cost_usd")
+ fig_grouped = go.Figure()
+ fig_grouped.add_trace(go.Bar(
+ name="Current cost",
+ y=top_bdf["resource_name"],
+ x=top_bdf["monthly_cost_usd"],
+ orientation="h",
+ marker_color="#e05252",
+ text=[f"${v:,.2f}" for v in top_bdf["monthly_cost_usd"]],
+ textposition="outside",
+ ))
+ fig_grouped.add_trace(go.Bar(
+ name="Potential saving",
+ y=top_bdf["resource_name"],
+ x=top_bdf["potential_saving"],
+ orientation="h",
+ marker_color="#10b981",
+ text=[f"${v:,.2f}" if v > 0 else "" for v in top_bdf["potential_saving"]],
+ textposition="outside",
+ ))
+ fig_grouped.update_layout(
+ barmode="group",
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0, r=60, t=10, b=0), height=280,
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+ xaxis=dict(showgrid=True, gridcolor="#f0f0f0", tickprefix="$"),
+ yaxis=dict(showgrid=False, autorange="reversed"),
+ )
+ st.plotly_chart(fig_grouped, use_container_width=True)
+
+ # ── Last-accessed timeline with tier thresholds ───────────────────
+ st.markdown("#### 📅 Days idle per bucket — access tier thresholds")
+ bdf_sorted = bdf.sort_values("days_since_access", ascending=False).reset_index(drop=True)
+ fig_timeline = px.bar(
+ bdf_sorted, x="resource_name", y="days_since_access",
+ color="access_tier", color_discrete_map=TIER_COLOR,
+ text="days_since_access",
+ labels={"resource_name": "Bucket", "days_since_access": "Days idle"},
+ custom_data=["size_gb", "monthly_cost_usd", "last_accessed"],
+ )
+ fig_timeline.update_traces(
+ texttemplate="%{text}d",
+ textposition="outside",
+ hovertemplate="
%{x}Last accessed: %{customdata[2]}
Days idle: %{y}
Size: %{customdata[0]:,.0f} GB
Cost: $%{customdata[1]:,.2f}/mo
"
+ )
+ fig_timeline.add_hline(y=30, line_dash="dash", line_color="#f59e0b", line_width=1.5,
+ annotation_text="IA (30d)", annotation_position="top right",
+ annotation_font=dict(size=10, color="#f59e0b"))
+ fig_timeline.add_hline(y=60, line_dash="dash", line_color="#f97316", line_width=1.5,
+ annotation_text="Cold (60d)", annotation_position="top right",
+ annotation_font=dict(size=10, color="#f97316"))
+ fig_timeline.add_hline(y=90, line_dash="dash", line_color="#e05252", line_width=1.5,
+ annotation_text="Frozen (90d)", annotation_position="top right",
+ annotation_font=dict(size=10, color="#e05252"))
+ fig_timeline.update_layout(
+ showlegend=True, plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0, r=0, t=10, b=80), height=320,
+ xaxis=dict(showgrid=False, tickangle=-30),
+ yaxis=dict(showgrid=True, gridcolor="#f0f0f0", title="Days idle"),
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+ )
+ st.plotly_chart(fig_timeline, use_container_width=True)
+
+ # ── Savings breakdown + Environment cost ───────────────────────────
+ sb1, sb2 = st.columns(2)
+ with sb1:
+ st.markdown("#### 💰 Savings breakdown by action type")
+ action_buckets = [b for b in buckets if b["potential_saving"] > 0]
+ if action_buckets:
+ def _action_label(b):
+ if b["terminate_candidate"]: return "Delete (frozen dev/sandbox)"
+ if b["access_tier"] == "Frozen": return "Archive to Glacier"
+ if b["access_tier"] == "Cold": return "Move to Glacier"
+ if b["access_tier"] == "Infrequent": return "Switch to S3-IA"
+ return "Other"
+ action_totals: dict = {}
+ for b in action_buckets:
+ lbl = _action_label(b)
+ action_totals[lbl] = action_totals.get(lbl, 0) + b["potential_saving"]
+ adf = pd.DataFrame([
+ {"Action": k, "Saving ($/mo)": round(v, 2)}
+ for k, v in sorted(action_totals.items(), key=lambda x: -x[1])
+ ])
+ fig_donut = px.pie(
+ adf, values="Saving ($/mo)", names="Action",
+ color_discrete_sequence=["#e05252","#f97316","#f59e0b","#10b981","#3b82f6"],
+ hole=0.52,
+ )
+ fig_donut.update_traces(
+ textposition="outside", textinfo="label+percent",
+ hovertemplate="
%{label}Save $%{value:,.2f}/mo
"
+ )
+ fig_donut.update_layout(
+ showlegend=False, paper_bgcolor="white",
+ margin=dict(l=0, r=0, t=10, b=0), height=260,
+ )
+ st.plotly_chart(fig_donut, use_container_width=True)
+ else:
+ st.info("No savings opportunities identified.")
+ with sb2:
+ st.markdown("#### 🏗️ Cost by environment")
+ env_df = bdf.groupby("environment", as_index=False).agg(
+ monthly_cost=("monthly_cost_usd", "sum"),
+ potential_saving=("potential_saving", "sum"),
+ buckets=("resource_id", "count"),
+ ).sort_values("monthly_cost", ascending=True)
+ fig_env = go.Figure()
+ fig_env.add_trace(go.Bar(
+ name="Current cost",
+ y=env_df["environment"],
+ x=env_df["monthly_cost"],
+ orientation="h",
+ marker_color="#e05252",
+ text=[f"${v:,.2f}" for v in env_df["monthly_cost"]],
+ textposition="outside",
+ ))
+ fig_env.add_trace(go.Bar(
+ name="Potential saving",
+ y=env_df["environment"],
+ x=env_df["potential_saving"],
+ orientation="h",
+ marker_color="#10b981",
+ text=[f"${v:,.2f}" if v > 0 else "" for v in env_df["potential_saving"]],
+ textposition="outside",
+ ))
+ fig_env.update_layout(
+ barmode="group",
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0, r=60, t=10, b=0), height=260,
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+ xaxis=dict(showgrid=True, gridcolor="#f0f0f0", tickprefix="$"),
+ yaxis=dict(showgrid=False),
+ )
+ st.plotly_chart(fig_env, use_container_width=True)
+
+ # ── Full bucket table ──────────────────────────────────────────────
+ st.markdown("#### 🗂️ All S3 buckets — detailed view")
+ s3_filter_col1, s3_filter_col2 = st.columns(2)
+ with s3_filter_col1:
+ tier_filter = st.multiselect(
+ "Filter by access tier",
+ ["Active", "Infrequent", "Cold", "Frozen"],
+ default=["Active", "Infrequent", "Cold", "Frozen"],
+ key="s3_tier_filter"
+ )
+ with s3_filter_col2:
+ env_filter = st.multiselect(
+ "Filter by environment",
+ sorted(bdf["environment"].unique()),
+ default=sorted(bdf["environment"].unique()),
+ key="s3_env_filter"
+ )
+ show_s3_cli = st.toggle("Show termination / remediation CLI", value=False, key="s3_cli_toggle")
+
+ filtered_buckets = [
+ b for b in buckets
+ if b["access_tier"] in tier_filter and b["environment"] in env_filter
+ ]
+
+ for b in filtered_buckets:
+ tier_col = TIER_COLOR.get(b["access_tier"], "#888")
+ terminate_badge = (
+ '
🔴 TERMINATE'
+ if b["terminate_candidate"] else ""
+ )
+ cli_html = (
+ f'
$ {b["cli_fix"]}
'
+ if show_s3_cli else ""
+ )
+ # Pre-compute dollar strings — avoids Streamlit treating $X as LaTeX
+ size_str = f'{b["size_gb"]:,.0f} GB'
+ cost_str = f'USD {b["monthly_cost_usd"]:,.2f}/mo'
+ saving_html = (
+ f'
'
+ f'▲ Save USD {b["potential_saving"]:,.2f}/mo'
+ if b["potential_saving"] > 0 else ""
+ )
+ st.markdown(f"""
+
+
+
+
{b['resource_name']}
+
{b['resource_id']} · {b['region']} · {b['team']} · {b['environment']}
+
+
+ {size_str} | {cost_str}
+
+
+
+
+ {b['access_tier']}
+
+ 📅 Last accessed: {b['last_accessed']}
+ ⏱ {b['days_since_access']}d idle
+ {terminate_badge}
+
+
+ 💡 {b['recommendation']}{saving_html}
+
+ {cli_html}
+
""", unsafe_allow_html=True)
+
+ # ── Terminate candidates summary ───────────────────────────────────
+ terminate_list = [b for b in buckets if b["terminate_candidate"]]
+ if terminate_list:
+ st.markdown("---")
+ st.markdown("#### 🗑️ Termination candidates")
+ st.error(
+ f"**{len(terminate_list)} bucket(s)** in dev/sandbox environments have not been accessed "
+ f"in 90+ days. These are strong candidates for deletion."
+ )
+ total_term_saving = sum(b["monthly_cost_usd"] for b in terminate_list)
+ total_term_gb = sum(b["size_gb"] for b in terminate_list)
+ st.markdown(
+ f"Deleting them would free **{total_term_gb:,.0f} GB** and save "
+ f"**${total_term_saving:,.2f}/mo** (${total_term_saving*12:,.0f}/yr)."
+ )
+ for b in terminate_list:
+ st.markdown(
+ f'
$ {b["cli_fix"]}
',
+ unsafe_allow_html=True
+ )
+
+# ── RIGHT PANEL: FinOps AI Chatbot ────────────────────────────────────────────
+with chat_col:
+ st.markdown("""
+
+
🤖 FinOps AI
+
+ Ask anything about your AWS costs
- """, unsafe_allow_html=True)
+
+ """, unsafe_allow_html=True)
+
+ # Suggested questions
+ suggestions = [
+ "Which service should I fix first?",
+ "Why did EC2-Other spike?",
+ "How much can we save on Neptune?",
+ "What is the DevOpsAgent charge?",
+ "Give me a 3-step action plan",
+ ]
+ st.markdown("
💡 Suggested questions:
",
+ unsafe_allow_html=True)
+ for i, sug in enumerate(suggestions):
+ if st.button(sug, key=f"sug_{i}", use_container_width=True):
+ st.session_state.chat_history.append({"role":"user","content":sug})
+ with st.spinner("Thinking..."):
+ ans = call_claude(st.session_state.chat_history)
+ st.session_state.chat_history.append({"role":"assistant","content":ans})
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Chat history
+ for msg in st.session_state.chat_history:
+ if msg["role"] == "user":
+ st.markdown(
+ f"
You: {msg['content']}
",
+ unsafe_allow_html=True)
+ else:
+ st.markdown(
+ f"
🤖 FinOps AI: {msg['content']}
",
+ unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Input
+ if prompt_input := st.chat_input("Ask about your AWS costs..."):
+ st.session_state.chat_history.append({"role":"user","content":prompt_input})
+ with st.spinner("Thinking..."):
+ ans = call_claude(st.session_state.chat_history)
+ st.session_state.chat_history.append({"role":"assistant","content":ans})
+ st.rerun()
-# ─── Recommendation ───────────────────────────────────────────────────────────
-st.markdown("---")
-st.markdown("#### 📋 Leadership recommendation")
-st.info(report.get("closing_recommendation", ""))
-st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter")
+ if st.session_state.chat_history:
+ if st.button("🗑️ Clear chat", use_container_width=True):
+ st.session_state.chat_history = []
+ st.rerun()
\ No newline at end of file
diff --git a/dashboard_AI.py b/dashboard_AI.py
new file mode 100644
index 0000000..40692a4
--- /dev/null
+++ b/dashboard_AI.py
@@ -0,0 +1,370 @@
+import json, os, urllib.request
+import streamlit as st
+import plotly.express as px
+import pandas as pd
+import requests
+
+st.set_page_config(page_title="Cloud Cost Waste Hunter", page_icon="👻",
+ layout="wide", initial_sidebar_state="expanded")
+
+st.markdown("""
+
+""", unsafe_allow_html=True)
+
+# ── Load report ────────────────────────────────────────────────────────────────
+@st.cache_data
+def load_report(path="llm_report.json"):
+ with open(path) as f:
+ return json.load(f)
+
+report = load_report()
+quick_wins = report.get("quick_wins", [])
+raw_findings = report.get("findings", [])
+
+def normalise(f):
+ return {
+ "rank": f.get("rank", 0),
+ "name": f.get("service", f.get("resource_name", "Unknown")),
+ "category": f.get("category", f.get("flag", "—")),
+ "plain_english": f.get("plain_english", ""),
+ "business_impact": f.get("business_impact", ""),
+ "monthly_saving": f.get("monthly_opportunity", f.get("monthly_saving", 0.0)),
+ "priority_action": f.get("priority_action", ""),
+ "aws_action": f.get("aws_action", f.get("cli_fix", "")),
+ "severity": f.get("severity", "HIGH" if f.get("monthly_opportunity", f.get("monthly_saving", 0)) > 100 else "MEDIUM"),
+ }
+
+findings = [normalise(f) for f in raw_findings]
+total_monthly = report.get("total_monthly_opportunity", report.get("total_monthly_waste", 0))
+total_annual = report.get("total_annual_waste", total_monthly * 12)
+total_spend = report.get("total_monthly_spend", 0)
+raw_services = report.get("raw_data", {}).get("services", [])
+all_f_legacy = report.get("all_findings", [])
+
+# ── Claude chatbot helpers ─────────────────────────────────────────────────────
+def build_context():
+ lines = [
+ "You are a senior FinOps engineer assistant in the Ghost Busters Cloud Cost Waste Hunter dashboard.",
+ "Answer clearly and concisely, grounding every response in the actual account data below.",
+ "Keep answers to 3-5 sentences unless the user asks for detail.",
+ "",
+ f"Data source: {report.get('source', 'AWS Cost Explorer')}",
+ f"Monthly spend: ${total_spend:,.2f}" if total_spend else "",
+ f"Monthly opportunity: ${total_monthly:,.2f}",
+ f"Executive summary: {report.get('executive_summary', '')}",
+ "",
+ "FINDINGS:",
+ ]
+ for fi in raw_findings:
+ lines.append(
+ f"#{fi.get('rank','')} {fi.get('service', fi.get('resource_name',''))} | "
+ f"${fi.get('monthly_opportunity', fi.get('monthly_saving', 0)):,.2f}/mo | "
+ f"{fi.get('plain_english','')[:120]} | "
+ f"Action: {fi.get('priority_action','')[:80]}"
+ )
+ lines += ["", "QUICK WINS:"] + [f"- {w}" for w in quick_wins]
+ sb = report.get("service_breakdown", {})
+ if sb:
+ lines += [
+ f"Biggest concern: {sb.get('biggest_concern','')}",
+ f"Watch list: {', '.join(sb.get('watch_list',[]))}",
+ ]
+ lines.append(f"Recommendation: {report.get('closing_recommendation','')}")
+ return "\n".join(l for l in lines if l is not None)
+
+def call_claude(messages):
+ api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+ if not api_key:
+ return "⚠️ ANTHROPIC_API_KEY not set. Run `export ANTHROPIC_API_KEY='sk-ant-...'` then restart Streamlit."
+ try:
+ payload = json.dumps({
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": 800,
+ "system": build_context(),
+ "messages": messages
+ }).encode()
+ req = urllib.request.Request(
+ "https://api.anthropic.com/v1/messages",
+ data=payload,
+ headers={"Content-Type":"application/json",
+ "x-api-key":api_key,
+ "anthropic-version":"2023-06-01"},
+ method="POST"
+ )
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ data = json.loads(resp.read().decode())
+ return data["content"][0]["text"]
+ except Exception as e:
+ return f"❌ Error: {e}"
+
+if "chat_history" not in st.session_state:
+ st.session_state.chat_history = []
+
+# ── Sidebar ────────────────────────────────────────────────────────────────────
+with st.sidebar:
+ st.markdown("## 👻 Ghost Busters")
+ st.markdown("*Cloud Cost Waste Hunter*")
+ st.markdown("---")
+ categories = sorted(set(f["category"] for f in findings))
+ selected_cats = st.multiselect("Filter by category", categories, default=categories)
+ selected_sev = st.multiselect("Filter by severity", ["HIGH","MEDIUM","LOW"], default=["HIGH","MEDIUM","LOW"])
+ st.markdown("---")
+ st.markdown("**Slack webhook alert**")
+ slack_url = st.text_input("Webhook URL", placeholder="https://hooks.slack.com/...")
+ if st.button("🔔 Fire top finding alert", use_container_width=True):
+ if slack_url and findings:
+ top = findings[0]
+ payload = {"blocks":[
+ {"type":"header","text":{"type":"plain_text","text":"👻 Cloud Cost Waste Hunter Alert"}},
+ {"type":"section","text":{"type":"mrkdwn","text":f"*#{top['rank']} — {top['name']}*\n{top['plain_english']}"}},
+ {"type":"section","fields":[
+ {"type":"mrkdwn","text":f"*Opportunity*\n${top['monthly_saving']:,.2f}/mo"},
+ {"type":"mrkdwn","text":f"*Action*\n{top['priority_action'][:80]}..."}
+ ]},
+ {"type":"section","text":{"type":"mrkdwn","text":f"*Total opportunity:* ${total_monthly:,.2f}/mo"}}
+ ]}
+ try:
+ r = requests.post(slack_url, json=payload, timeout=5)
+ st.success("✅ Sent!") if r.status_code==200 else st.error(f"Failed: {r.status_code}")
+ except Exception as e:
+ st.error(str(e))
+ else:
+ st.warning("Enter a Slack webhook URL first")
+ st.markdown("---")
+ st.caption(f"Generated: {report.get('generated_at','—')}")
+ if report.get("source"): st.caption(f"Source: {report['source']}")
+
+# ── Page header ───────────────────────────────────────────────────────────────
+st.markdown('
👻 Cloud Cost Waste Hunter
', unsafe_allow_html=True)
+st.markdown('', unsafe_allow_html=True)
+if report.get("source"):
+ st.markdown(f'
📊 {report["source"]}', unsafe_allow_html=True)
+
+# ── MAIN LAYOUT: left 62% content | right 38% chatbot ─────────────────────────
+main_col, chat_col = st.columns([0.62, 0.38])
+
+with main_col:
+ # Metric cards
+ c1, c2, c3, c4 = st.columns(4)
+ with c1:
+ st.markdown(f"""
+
Monthly opportunity
+
${total_monthly:,.0f}
+
recoverable now
+
""", unsafe_allow_html=True)
+ with c2:
+ st.markdown(f"""
+
Annual opportunity
+
${total_annual:,.0f}
+
if unaddressed
+
""", unsafe_allow_html=True)
+ with c3:
+ st.markdown(f"""
+
Findings
+
{len(findings)}
+
services flagged
+
""", unsafe_allow_html=True)
+ with c4:
+ if total_spend > 0:
+ pct = round((total_monthly / total_spend) * 100, 1)
+ st.markdown(f"""
+
Total spend
+
${total_spend:,.0f}
+
{pct}% recoverable
+
""", unsafe_allow_html=True)
+ else:
+ top_f = findings[0] if findings else {}
+ st.markdown(f"""
+
Top finding
+
{top_f.get('name','—')[:12]}
+
${top_f.get('monthly_saving',0):,.0f}/mo
+
""", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # AI summary
+ st.markdown(f'
🤖 AI Summary
{report["executive_summary"]}
',
+ unsafe_allow_html=True)
+
+ # Charts
+ chart_l, chart_r = st.columns(2)
+ with chart_l:
+ st.markdown("#### Cost by service")
+ src = raw_services or []
+ if src:
+ svc_df = pd.DataFrame([
+ {"Service": s["service"][:22], "April ($)": s["apr_2026"]}
+ for s in sorted(src, key=lambda x: -x["apr_2026"])[:8]
+ ])
+ fig = px.bar(svc_df, x="April ($)", y="Service", orientation="h",
+ color="April ($)", color_continuous_scale=["#fde8e8","#e05252"], text="April ($)")
+ fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+ fig.update_layout(showlegend=False, coloraxis_showscale=False,
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=60,t=10,b=0), height=260,
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+ st.plotly_chart(fig, use_container_width=True)
+ elif all_f_legacy:
+ svc_t = {}
+ for f in all_f_legacy:
+ svc_t[f.get("service","Other")] = svc_t.get(f.get("service","Other"),0)+f.get("monthly_waste_usd",0)
+ sdf = pd.DataFrame([{"Service":k,"Waste ($)":round(v,2)} for k,v in sorted(svc_t.items(),key=lambda x:-x[1])])
+ fig = px.bar(sdf,x="Waste ($)",y="Service",orientation="h",
+ color="Waste ($)",color_continuous_scale=["#fde8e8","#e05252"],text="Waste ($)")
+ fig.update_traces(texttemplate="$%{text:,.0f}",textposition="outside")
+ fig.update_layout(showlegend=False,coloraxis_showscale=False,
+ plot_bgcolor="white",paper_bgcolor="white",
+ margin=dict(l=0,r=60,t=10,b=0),height=260,
+ yaxis=dict(showgrid=False),xaxis=dict(showgrid=True,gridcolor="#f0f0f0"))
+ st.plotly_chart(fig, use_container_width=True)
+
+ with chart_r:
+ st.markdown("#### Opportunity by category")
+ cat_t = {}
+ for f in findings:
+ cat_t[f["category"]] = cat_t.get(f["category"],0) + f["monthly_saving"]
+ if cat_t:
+ cdf = pd.DataFrame([{"Category":k,"Opp ($)":round(v,2)} for k,v in sorted(cat_t.items(),key=lambda x:-x[1]) if v>0])
+ fig2 = px.pie(cdf,values="Opp ($)",names="Category",
+ color_discrete_sequence=["#e05252","#f59e0b","#3b82f6","#8b5cf6","#10b981"],hole=0.45)
+ fig2.update_traces(textposition="outside",textinfo="label+percent")
+ fig2.update_layout(showlegend=False,paper_bgcolor="white",
+ margin=dict(l=0,r=0,t=10,b=0),height=260)
+ st.plotly_chart(fig2, use_container_width=True)
+
+ # Quick wins
+ if quick_wins:
+ st.markdown("#### ⚡ Quick wins")
+ for w in quick_wins[:3]:
+ st.markdown(f'
✅ {w}
', unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Findings
+ st.markdown("#### 🔍 Flagged services")
+ filtered = [f for f in findings if f["category"] in selected_cats and f["severity"] in selected_sev]
+ if not filtered:
+ st.info("No findings match filters.")
+ else:
+ show_action = st.toggle("Show AWS remediation actions", value=False)
+ for f in filtered:
+ sev = f["severity"].lower()
+ action_html = f'
$ {f["aws_action"]}
' if show_action and f["aws_action"] else ""
+ saving = f"${f['monthly_saving']:,.2f}/mo opportunity" if f["monthly_saving"] > 0 else "Investigate"
+ st.markdown(f"""
+
+
FINDING #{f['rank']}
+
{f['name']}
+
{f['plain_english']}
+
Impact: {f['business_impact']}
+
+ {f['severity']}
+ 🏷 {f['category']}
+ 💰 {saving}
+
+
🔧 {f['priority_action']}
+ {action_html}
+
""", unsafe_allow_html=True)
+
+ # Service insights
+ sb = report.get("service_breakdown", {})
+ if sb:
+ st.markdown("---")
+ st.markdown("#### 📊 Service insights")
+ si1, si2 = st.columns(2)
+ with si1:
+ if sb.get("biggest_concern"): st.error(f"🚨 **Biggest concern:** {sb['biggest_concern']}")
+ if sb.get("most_improved"): st.success(f"✅ **Most improved:** {sb['most_improved']}")
+ with si2:
+ if sb.get("watch_list"): st.warning(f"👀 **Watch list:** {', '.join(sb['watch_list'])}")
+
+ st.markdown("---")
+ st.markdown("#### 📋 Leadership recommendation")
+ st.info(report.get("closing_recommendation", ""))
+ st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter")
+
+# ── RIGHT PANEL: FinOps AI Chatbot ────────────────────────────────────────────
+with chat_col:
+ st.markdown("""
+
+
🤖 FinOps AI
+
+ Ask anything about your AWS costs
+
+
+ """, unsafe_allow_html=True)
+
+ # Suggested questions
+ suggestions = [
+ "Which service should I fix first?",
+ "Why did EC2-Other spike?",
+ "How much can we save on Neptune?",
+ "What is the DevOpsAgent charge?",
+ "Give me a 3-step action plan",
+ ]
+ st.markdown("
💡 Suggested questions:
",
+ unsafe_allow_html=True)
+ for i, sug in enumerate(suggestions):
+ if st.button(sug, key=f"sug_{i}", use_container_width=True):
+ st.session_state.chat_history.append({"role":"user","content":sug})
+ with st.spinner("Thinking..."):
+ ans = call_claude(st.session_state.chat_history)
+ st.session_state.chat_history.append({"role":"assistant","content":ans})
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Chat history
+ for msg in st.session_state.chat_history:
+ if msg["role"] == "user":
+ st.markdown(
+ f"
You: {msg['content']}
",
+ unsafe_allow_html=True)
+ else:
+ st.markdown(
+ f"
🤖 FinOps AI: {msg['content']}
",
+ unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Input
+ if prompt_input := st.chat_input("Ask about your AWS costs..."):
+ st.session_state.chat_history.append({"role":"user","content":prompt_input})
+ with st.spinner("Thinking..."):
+ ans = call_claude(st.session_state.chat_history)
+ st.session_state.chat_history.append({"role":"assistant","content":ans})
+ st.rerun()
+
+ if st.session_state.chat_history:
+ if st.button("🗑️ Clear chat", use_container_width=True):
+ st.session_state.chat_history = []
+ st.rerun()
\ No newline at end of file
diff --git a/detection_engine.py b/detection_engine.py
index 4883b55..83d9627 100644
--- a/detection_engine.py
+++ b/detection_engine.py
@@ -1,3 +1,5 @@
+import re
+import os
import pandas as pd
import json
from datetime import datetime, timedelta
@@ -27,11 +29,281 @@
}
# ─── Load data ────────────────────────────────────────────────────────────────
+
+REQUIRED_COLUMNS = {
+ "resource_id", "resource_name", "service", "resource_type",
+ "region", "team", "environment", "cpu_avg_7d", "monthly_cost_usd",
+ "days_running", "last_accessed", "status",
+}
+
+
+def _detect_csv_format(filepath: str) -> str:
+ """Return 'inventory', 'billing_resource', or 'billing_service'."""
+ with open(filepath, newline="", encoding="utf-8-sig") as f:
+ first_line = f.readline()
+ # Resource-level billing export: first cell is "Resource", columns are ARNs/IDs with ($)
+ if first_line.lstrip("\ufeff").strip('"').startswith("Resource") and "($)" in first_line:
+ return "billing_resource"
+ # Service-level billing export: first cell is "Service", columns are service names with ($)
+ if "Service" in first_line and "($)" in first_line:
+ return "billing_service"
+ return "inventory"
+
+
def load_data(filepath="aws_cost_data.csv"):
+ if not os.path.exists(filepath):
+ raise FileNotFoundError(
+ f"\n File not found: '{filepath}'\n"
+ f" Check the path and try again."
+ )
+
+ fmt = _detect_csv_format(filepath)
+
+ if fmt == "billing_resource":
+ # Cost Explorer resource-level export — run billing analysis instead
+ print(f" Detected format: AWS Cost Explorer resource-level export")
+ print(f" Running billing analysis mode...\n")
+ run_billing_analysis(filepath)
+ raise SystemExit(0)
+
+ if fmt == "billing_service":
+ raise ValueError(
+ f"\n Detected an AWS Cost Explorer service-level billing export in '{filepath}'.\n"
+ f" This format has service costs grouped by date and lacks per-resource\n"
+ f" metrics (CPU, size, team, etc.) needed for waste detection.\n\n"
+ f" Use 'aws_cost_data.csv' (the included sample) as a template for the\n"
+ f" resource inventory format this engine requires."
+ )
+
+ # Inventory format — validate required columns
+ preview = pd.read_csv(filepath, nrows=0)
+ actual_cols = set(preview.columns.str.strip())
+ missing = REQUIRED_COLUMNS - actual_cols
+ if missing:
+ raise ValueError(
+ f"\n Incompatible CSV format in '{filepath}'.\n"
+ f" Missing required columns: {sorted(missing)}\n"
+ f" Found columns: {sorted(actual_cols)}\n\n"
+ f" Required columns: {sorted(REQUIRED_COLUMNS)}"
+ )
+
df = pd.read_csv(filepath, parse_dates=["last_accessed"])
df["days_since_access"] = (datetime.today() - df["last_accessed"]).dt.days
return df
+
+# ─── Billing CSV analysis (Cost Explorer resource-level export) ───────────────
+
+def _infer_service(resource_id: str) -> str:
+ h = resource_id
+ if re.match(r"^i-[0-9a-f]+$", h): return "EC2"
+ if re.match(r"^vol-[0-9a-f]+$", h): return "EBS"
+ if re.match(r"^snap-[0-9a-f]+$", h): return "EBS Snapshot"
+ if re.match(r"^eipalloc-", h): return "Elastic IP"
+ if re.match(r"^eni-", h): return "Network Interface"
+ if re.match(r"^vpn-", h): return "VPN"
+ if "cloudfront" in h: return "CloudFront"
+ if "elasticloadbalancing" in h: return "ELB"
+ if "rds" in h or "docdb" in h: return "RDS"
+ if "kinesis" in h: return "Kinesis"
+ if "logs" in h and "log-group" in h: return "CloudWatch Logs"
+ if "lambda" in h: return "Lambda"
+ if "kms" in h: return "KMS"
+ if "elastic-ip" in h: return "Elastic IP"
+ if "route53" in h: return "Route53"
+ if "quicksight" in h: return "QuickSight"
+ if "arn:aws:s3:::" in h: return "S3"
+ if "secretsmanager" in h: return "Secrets Manager"
+ if "ecr" in h: return "ECR"
+ if "elasticfilesystem" in h: return "EFS"
+ if "scheduler" in h: return "EventBridge"
+ if "sns" in h: return "SNS"
+ if "sqs" in h: return "SQS"
+ if "amplify" in h: return "Amplify"
+ if "cloudformation" in h: return "CloudFormation"
+ return "Other"
+
+
+def _extract_region_from_arn(arn: str) -> str:
+ parts = arn.split(":")
+ if len(parts) >= 4 and parts[3]:
+ return parts[3]
+ return "global"
+
+
+def run_billing_analysis(filepath: str) -> None:
+ """Parse a Cost Explorer resource-level billing CSV and write billing_report.json."""
+ import csv as csv_mod
+ from collections import defaultdict
+
+ with open(filepath, newline="", encoding="utf-8-sig") as f:
+ rows = list(csv_mod.reader(f))
+
+ if len(rows) < 2:
+ print(" Empty or unreadable file.")
+ return
+
+ headers = rows[0] # label + resource ARNs/IDs
+ total_row = rows[1] # "Resource total" + costs
+
+ # Collect date rows (any row whose first cell looks like a date)
+ date_rows = [r for r in rows[2:] if re.match(r"\d{4}-\d{2}-\d{2}", r[0].strip('"'))]
+ dates = [r[0].strip('"') for r in date_rows]
+
+ # Build S3 bucket name set from ARN-prefixed columns for cross-reference
+ s3_bucket_names = {
+ headers[i].strip('"').replace("arn:aws:s3:::", "").replace("($)", "").strip()
+ for i in range(1, len(headers))
+ if "arn:aws:s3:::" in headers[i]
+ }
+
+ resources = []
+ for i in range(1, len(headers)):
+ raw_name = headers[i].strip('"').replace("($)", "").strip()
+ cost_str = total_row[i] if i < len(total_row) else "0"
+ try:
+ total_cost = float(cost_str)
+ except ValueError:
+ total_cost = 0.0
+
+ # Skip the synthetic "Total costs" column
+ if raw_name in ("Total costs", "No Resource Id"):
+ continue
+
+ # Determine service
+ # S3: bare bucket names (no arn: prefix) that appear in the s3_bucket_names set
+ if (raw_name in s3_bucket_names
+ and not raw_name.startswith("arn:")
+ and not re.match(r"^i-|^vol-|^vpn-|^snap-|^eipalloc-|^eni-", raw_name)):
+ service = "S3"
+ resource_id = raw_name
+ else:
+ service = _infer_service(raw_name)
+ resource_id = raw_name
+
+ # Skip ARN-prefixed S3 duplicates (they show $0 and are already counted above)
+ if "arn:aws:s3:::" in raw_name:
+ continue
+
+ # Per-date costs (for last-active heuristic)
+ daily_costs = []
+ for dr in date_rows:
+ try:
+ daily_costs.append((dr[0].strip('"'), float(dr[i]) if i < len(dr) else 0.0))
+ except (ValueError, IndexError):
+ daily_costs.append((dr[0].strip('"'), 0.0))
+
+ # Last date with non-zero cost
+ active_dates = [d for d, c in daily_costs if c > 0]
+ last_active = active_dates[-1] if active_dates else (dates[-1] if dates else "unknown")
+ first_active = active_dates[0] if active_dates else (dates[0] if dates else "unknown")
+
+ region = _extract_region_from_arn(raw_name) if raw_name.startswith("arn:") else "us-east-1"
+ monthly_cost = round(total_cost * 30, 2)
+
+ resources.append({
+ "resource_id": resource_id,
+ "service": service,
+ "region": region,
+ "daily_cost": round(total_cost, 6),
+ "monthly_cost": monthly_cost,
+ "last_active": last_active,
+ "first_active": first_active,
+ "date_range": f"{dates[0]} → {dates[-1]}" if dates else "unknown",
+ })
+
+ resources.sort(key=lambda x: -x["monthly_cost"])
+
+ # Service summary
+ by_service: dict = defaultdict(lambda: {"count": 0, "monthly_cost": 0.0, "resources": []})
+ for r in resources:
+ by_service[r["service"]]["count"] += 1
+ by_service[r["service"]]["monthly_cost"] += r["monthly_cost"]
+ by_service[r["service"]]["resources"].append(r)
+
+ # S3 deep dive
+ s3_resources = sorted(
+ [r for r in resources if r["service"] == "S3"],
+ key=lambda x: -x["monthly_cost"]
+ )
+ s3_zero = [r for r in s3_resources if r["monthly_cost"] == 0]
+ s3_active = [r for r in s3_resources if r["monthly_cost"] > 0]
+
+ total_monthly = round(sum(r["monthly_cost"] for r in resources), 2)
+ s3_monthly = round(sum(r["monthly_cost"] for r in s3_resources), 2)
+
+ report = {
+ "generated_at": datetime.today().strftime("%Y-%m-%d %H:%M"),
+ "source_file": filepath,
+ "date_range": f"{dates[0]} → {dates[-1]}" if dates else "unknown",
+ "total_resources": len(resources),
+ "total_monthly_cost": total_monthly,
+ "s3_summary": {
+ "total_buckets": len(s3_resources),
+ "buckets_with_cost": len(s3_active),
+ "zero_cost_buckets": len(s3_zero),
+ "total_monthly_cost": s3_monthly,
+ "pct_of_total": round(s3_monthly / total_monthly * 100, 1) if total_monthly else 0,
+ },
+ "service_breakdown": {
+ svc: {"count": v["count"], "monthly_cost": round(v["monthly_cost"], 2)}
+ for svc, v in sorted(by_service.items(), key=lambda x: -x[1]["monthly_cost"])
+ },
+ "top_s3_buckets": s3_active[:20],
+ "zero_cost_s3": [r["resource_id"] for r in s3_zero],
+ "top_resources": resources[:20],
+ }
+
+ with open("billing_report.json", "w") as f:
+ json.dump(report, f, indent=2)
+
+ # ── Print summary ──────────────────────────────────────────────────────────
+ print("=" * 60)
+ print(" BILLING ANALYSIS — COST EXPLORER RESOURCE EXPORT")
+ print("=" * 60)
+ print(f" Date range : {report['date_range']}")
+ print(f" Total resources : {report['total_resources']}")
+ print(f" Total monthly est : ${total_monthly:,.2f}")
+ print()
+ print(" Cost by service:")
+ for svc, info in report["service_breakdown"].items():
+ bar = "█" * min(int(info["monthly_cost"] / total_monthly * 30), 30) if total_monthly else ""
+ print(f" {svc:<25} ${info['monthly_cost']:>10,.2f}/mo {bar}")
+ print()
+ print(f" S3 BUCKET ANALYSIS ({len(s3_resources)} buckets · ${s3_monthly:,.2f}/mo)")
+ print(f" {'Bucket':<50} {'Monthly':>10}")
+ print(f" {'-'*50} {'-'*10}")
+ for b in s3_active[:15]:
+ print(f" {b['resource_id']:<50} ${b['monthly_cost']:>9,.2f}")
+ if len(s3_active) > 15:
+ print(f" ... and {len(s3_active)-15} more buckets")
+ if s3_zero:
+ print(f"\n {len(s3_zero)} buckets with $0 cost (potentially unused):")
+ for b in s3_zero[:10]:
+ print(f" - {b['resource_id']}")
+ print()
+ print(" billing_report.json written.")
+ print("=" * 60)
+
+# ─── S3 helpers ─────────────────────────────────────────────────────────────────
+
+def _parse_size_gb(resource_type: str) -> float:
+ """Extract numeric GB from strings like 'Standard-1625GB'."""
+ m = re.search(r'([\d.]+)GB', resource_type, re.IGNORECASE)
+ return float(m.group(1)) if m else 0.0
+
+
+def _s3_access_tier(days: int) -> str:
+ """Classify S3 bucket by days since last access."""
+ if days < 30:
+ return "Active"
+ elif days < 60:
+ return "Infrequent"
+ elif days < 90:
+ return "Cold"
+ return "Frozen"
+
+
# ─── Detection rules ──────────────────────────────────────────────────────────
def detect_idle_ec2(df):
@@ -115,20 +387,59 @@ def detect_cold_s3(df):
]
for _, r in s3.iterrows():
saving = round(r["monthly_cost_usd"] * 0.55, 2) # Glacier ~55% cheaper
+ size_gb = _parse_size_gb(str(r["resource_type"]))
+ tier = _s3_access_tier(int(r["days_since_access"]))
findings.append({
- "finding_id": f"S3-COLD-{r['resource_id'][-8:]}",
- "category": "Storage Optimisation",
- "severity": "MEDIUM",
- "service": "S3",
- "resource_id": r["resource_id"],
- "resource_name": r["resource_name"],
- "region": r["region"],
- "team": r["team"],
- "environment": r["environment"],
- "detail": f"Bucket not accessed in {r['days_since_access']} days but on S3 Standard pricing. Move to Glacier.",
+ "finding_id": f"S3-COLD-{r['resource_id'][-8:]}",
+ "category": "Storage Optimisation",
+ "severity": "MEDIUM",
+ "service": "S3",
+ "resource_id": r["resource_id"],
+ "resource_name": r["resource_name"],
+ "region": r["region"],
+ "team": r["team"],
+ "environment": r["environment"],
+ "size_gb": size_gb,
+ "access_tier": tier,
+ "days_since_access": int(r["days_since_access"]),
+ "last_accessed": str(r["last_accessed"].date()),
+ "detail": f"{size_gb:,.0f} GB bucket not accessed in {r['days_since_access']} days but on S3 Standard pricing. Tier: {tier}.",
+ "monthly_waste_usd": saving,
+ "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
+ "cli_fix": f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json"
+ })
+ return findings
+
+
+def detect_s3_infrequent_access(df):
+ """Flag S3 buckets accessed 30-59 days ago — candidates for S3-IA tier."""
+ findings = []
+ s3 = df[
+ (df["service"] == "S3") &
+ (df["days_since_access"] >= 30) &
+ (df["days_since_access"] < S3_COLD_DAYS)
+ ]
+ for _, r in s3.iterrows():
+ saving = round(r["monthly_cost_usd"] * 0.45, 2) # S3-IA ~45% cheaper
+ size_gb = _parse_size_gb(str(r["resource_type"]))
+ findings.append({
+ "finding_id": f"S3-IA-{r['resource_id'][-8:]}",
+ "category": "Storage Optimisation",
+ "severity": "LOW",
+ "service": "S3",
+ "resource_id": r["resource_id"],
+ "resource_name": r["resource_name"],
+ "region": r["region"],
+ "team": r["team"],
+ "environment": r["environment"],
+ "size_gb": size_gb,
+ "access_tier": "Infrequent",
+ "days_since_access": int(r["days_since_access"]),
+ "last_accessed": str(r["last_accessed"].date()),
+ "detail": f"{size_gb:,.0f} GB bucket last accessed {r['days_since_access']} days ago. Move to S3-Infrequent Access tier.",
"monthly_waste_usd": saving,
- "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
- "cli_fix": f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json"
+ "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.",
+ "cli_fix": f"aws s3api put-bucket-intelligent-tiering-configuration --bucket {r['resource_id']} --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled"
})
return findings
@@ -226,6 +537,74 @@ def build_summary(all_findings, top10):
}
+# ─── S3 deep-dive analysis ────────────────────────────────────────────────────
+
+def _build_s3_analysis(df):
+ """Build a rich S3 analysis dataset with size, tier, termination flags."""
+ s3 = df[df["service"] == "S3"].copy()
+ rows = []
+ for _, r in s3.iterrows():
+ size_gb = _parse_size_gb(str(r["resource_type"]))
+ tier = _s3_access_tier(int(r["days_since_access"]))
+ terminate = (tier == "Frozen") and (r["environment"] in ("dev", "sandbox"))
+ if tier == "Frozen":
+ saving = round(r["monthly_cost_usd"] * 0.55, 2)
+ recommendation = "Delete bucket (dev/sandbox) or archive to Glacier"
+ elif tier == "Cold":
+ saving = round(r["monthly_cost_usd"] * 0.55, 2)
+ recommendation = "Move to S3 Glacier via Lifecycle rule"
+ elif tier == "Infrequent":
+ saving = round(r["monthly_cost_usd"] * 0.45, 2)
+ recommendation = "Switch to S3-Infrequent Access or Intelligent-Tiering"
+ else:
+ saving = 0.0
+ recommendation = "No action needed — actively used"
+ rows.append({
+ "resource_id": r["resource_id"],
+ "resource_name": r["resource_name"],
+ "region": r["region"],
+ "team": r["team"],
+ "environment": r["environment"],
+ "size_gb": size_gb,
+ "storage_class": str(r["resource_type"]).split("-")[0],
+ "last_accessed": str(r["last_accessed"].date()),
+ "days_since_access": int(r["days_since_access"]),
+ "access_tier": tier,
+ "monthly_cost_usd": float(r["monthly_cost_usd"]),
+ "potential_saving": saving,
+ "terminate_candidate": terminate,
+ "recommendation": recommendation,
+ "cli_fix": (
+ f"aws s3 rb s3://{r['resource_id']} --force"
+ if terminate else
+ f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json"
+ ),
+ })
+ rows.sort(key=lambda x: x["days_since_access"], reverse=True)
+ total_size = round(sum(r["size_gb"] for r in rows), 1)
+ total_cost = round(sum(r["monthly_cost_usd"] for r in rows), 2)
+ total_saving = round(sum(r["potential_saving"] for r in rows), 2)
+ tier_summary = {}
+ for r in rows:
+ tier_summary.setdefault(r["access_tier"], {"count": 0, "size_gb": 0.0, "cost": 0.0})
+ tier_summary[r["access_tier"]]["count"] += 1
+ tier_summary[r["access_tier"]]["size_gb"] += r["size_gb"]
+ tier_summary[r["access_tier"]]["cost"] += r["monthly_cost_usd"]
+ for t in tier_summary.values():
+ t["size_gb"] = round(t["size_gb"], 1)
+ t["cost"] = round(t["cost"], 2)
+ return {
+ "generated_at": datetime.today().strftime("%Y-%m-%d %H:%M"),
+ "total_buckets": len(rows),
+ "total_size_gb": total_size,
+ "total_monthly_cost": total_cost,
+ "potential_saving": total_saving,
+ "terminate_candidates": sum(1 for r in rows if r["terminate_candidate"]),
+ "tier_summary": tier_summary,
+ "buckets": rows,
+ }
+
+
# ─── Main ─────────────────────────────────────────────────────────────────────
def run_detection(filepath="aws_cost_data.csv"):
@@ -239,11 +618,17 @@ def run_detection(filepath="aws_cost_data.csv"):
detect_unattached_ebs(df) +
detect_unassociated_eips(df) +
detect_cold_s3(df) +
+ detect_s3_infrequent_access(df) +
detect_rightsizing(df)
)
print(f"Total findings: {len(all_findings)}")
+ # Build and persist dedicated S3 analysis dataset
+ s3_analysis = _build_s3_analysis(df)
+ with open("s3_analysis.json", "w") as f:
+ json.dump(s3_analysis, f, indent=2)
+
top10 = score_and_rank(all_findings)
summary = build_summary(all_findings, top10)
@@ -273,4 +658,15 @@ def run_detection(filepath="aws_cost_data.csv"):
return output
if __name__ == "__main__":
- run_detection("aws_cost_data.csv")
+ import argparse
+ parser = argparse.ArgumentParser(
+ description="Cloud Cost Waste Hunter — detect AWS waste from a cost CSV file"
+ )
+ parser.add_argument(
+ "filepath",
+ nargs="?",
+ default="aws_cost_data.csv",
+ help="Path to the AWS cost CSV file (default: aws_cost_data.csv)",
+ )
+ args = parser.parse_args()
+ run_detection(args.filepath)
diff --git a/findings.json b/findings.json
index ee13b7a..e8c747c 100644
--- a/findings.json
+++ b/findings.json
@@ -1,13 +1,13 @@
{
"summary": {
- "generated_at": "2026-05-25 18:13",
- "total_findings": 39,
- "total_monthly_waste": 5647.17,
- "total_annual_waste": 67766.04,
+ "generated_at": "2026-05-26 15:20",
+ "total_findings": 41,
+ "total_monthly_waste": 5726.62,
+ "total_annual_waste": 68719.44,
"waste_by_category": {
"Idle Resource": 4990.2,
"Zombie Resource": 551.6,
- "Storage Optimisation": 105.37
+ "Storage Optimisation": 184.82
},
"top10_monthly_waste": 4608.3
},
@@ -199,7 +199,7 @@
"recommendation": "Stop or terminate i-09963334018. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-09963334018 --region us-east-1",
"waste_score": 60.0,
- "rank": 17
+ "rank": 18
},
{
"finding_id": "IDLE-EC2-989805",
@@ -216,7 +216,7 @@
"recommendation": "Stop or terminate i-01438989805. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01438989805 --region us-east-1",
"waste_score": 30.0,
- "rank": 21
+ "rank": 22
},
{
"finding_id": "IDLE-EC2-792787",
@@ -352,7 +352,7 @@
"recommendation": "Stop or terminate i-01822873088. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01822873088 --region eu-west-1",
"waste_score": 60.0,
- "rank": 18
+ "rank": 19
},
{
"finding_id": "IDLE-EC2-514789",
@@ -386,7 +386,7 @@
"recommendation": "Stop or terminate i-04875962612. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-04875962612 --region ap-south-1",
"waste_score": 30.0,
- "rank": 22
+ "rank": 23
},
{
"finding_id": "IDLE-EC2-237817",
@@ -420,7 +420,7 @@
"recommendation": "Stop or terminate i-01676168421. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01676168421 --region eu-west-1",
"waste_score": 30.0,
- "rank": 23
+ "rank": 24
},
{
"finding_id": "IDLE-RDS-LA8541",
@@ -505,7 +505,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-08067372072 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08067372072",
"waste_score": 20.0,
- "rank": 24
+ "rank": 25
},
{
"finding_id": "EBS-UNATTACHED-416213",
@@ -522,7 +522,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-01429416213 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-01429416213",
"waste_score": 20.0,
- "rank": 25
+ "rank": 26
},
{
"finding_id": "EBS-UNATTACHED-285822",
@@ -539,7 +539,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-09303285822 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-09303285822",
"waste_score": 10.0,
- "rank": 29
+ "rank": 31
},
{
"finding_id": "EBS-UNATTACHED-782991",
@@ -556,7 +556,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03271782991 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03271782991",
"waste_score": 10.0,
- "rank": 30
+ "rank": 32
},
{
"finding_id": "EBS-UNATTACHED-872495",
@@ -607,7 +607,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-07393195616 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-07393195616",
"waste_score": 20.0,
- "rank": 26
+ "rank": 27
},
{
"finding_id": "EBS-UNATTACHED-195918",
@@ -624,7 +624,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-05310195918 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-05310195918",
"waste_score": 5.0,
- "rank": 32
+ "rank": 34
},
{
"finding_id": "EBS-UNATTACHED-785916",
@@ -641,7 +641,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-08325785916 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08325785916",
"waste_score": 20.0,
- "rank": 27
+ "rank": 28
},
{
"finding_id": "EBS-UNATTACHED-292475",
@@ -658,7 +658,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03343292475 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03343292475",
"waste_score": 5.0,
- "rank": 33
+ "rank": 35
},
{
"finding_id": "EBS-UNATTACHED-374338",
@@ -692,7 +692,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03929454134 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03929454134",
"waste_score": 20.0,
- "rank": 28
+ "rank": 29
},
{
"finding_id": "EIP-UNUSED-640499",
@@ -709,7 +709,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-063640499 --region ap-south-1",
"waste_score": 2.16,
- "rank": 34
+ "rank": 36
},
{
"finding_id": "EIP-UNUSED-062156",
@@ -726,7 +726,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-067062156 --region ap-south-1",
"waste_score": 2.16,
- "rank": 35
+ "rank": 37
},
{
"finding_id": "EIP-UNUSED-813739",
@@ -743,7 +743,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-084813739 --region ap-south-1",
"waste_score": 2.16,
- "rank": 36
+ "rank": 38
},
{
"finding_id": "EIP-UNUSED-600766",
@@ -760,7 +760,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-099600766 --region eu-west-1",
"waste_score": 2.16,
- "rank": 37
+ "rank": 39
},
{
"finding_id": "EIP-UNUSED-276174",
@@ -777,7 +777,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-067276174 --region eu-west-1",
"waste_score": 2.16,
- "rank": 38
+ "rank": 40
},
{
"finding_id": "EIP-UNUSED-788100",
@@ -794,7 +794,28 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-044788100 --region ap-south-1",
"waste_score": 2.16,
- "rank": 39
+ "rank": 41
+ },
+ {
+ "finding_id": "S3-COLD-ucket-03",
+ "category": "Storage Optimisation",
+ "severity": "MEDIUM",
+ "service": "S3",
+ "resource_id": "s3-frontend-dev-bucket-03",
+ "resource_name": "frontend-dev-s3-03",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 4951.0,
+ "access_tier": "Cold",
+ "days_since_access": 60,
+ "last_accessed": "2026-03-27",
+ "detail": "4,951 GB bucket not accessed in 60 days but on S3 Standard pricing. Tier: Cold.",
+ "monthly_waste_usd": 62.63,
+ "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json",
+ "waste_score": 62.63,
+ "rank": 17
},
{
"finding_id": "S3-COLD-ucket-04",
@@ -806,12 +827,16 @@
"region": "us-east-1",
"team": "payments",
"environment": "sandbox",
- "detail": "Bucket not accessed in 101 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 3822.0,
+ "access_tier": "Frozen",
+ "days_since_access": 102,
+ "last_accessed": "2026-02-13",
+ "detail": "3,822 GB bucket not accessed in 102 days but on S3 Standard pricing. Tier: Frozen.",
"monthly_waste_usd": 48.35,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-04 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 48.35,
- "rank": 20
+ "rank": 21
},
{
"finding_id": "S3-COLD-ucket-07",
@@ -823,12 +848,16 @@
"region": "us-east-1",
"team": "payments",
"environment": "sandbox",
- "detail": "Bucket not accessed in 78 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 578.0,
+ "access_tier": "Cold",
+ "days_since_access": 79,
+ "last_accessed": "2026-03-08",
+ "detail": "578 GB bucket not accessed in 79 days but on S3 Standard pricing. Tier: Cold.",
"monthly_waste_usd": 7.31,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 7.31,
- "rank": 31
+ "rank": 33
},
{
"finding_id": "S3-COLD-ucket-08",
@@ -840,12 +869,37 @@
"region": "us-east-1",
"team": "ml-ops",
"environment": "staging",
- "detail": "Bucket not accessed in 91 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 3930.0,
+ "access_tier": "Frozen",
+ "days_since_access": 92,
+ "last_accessed": "2026-02-23",
+ "detail": "3,930 GB bucket not accessed in 92 days but on S3 Standard pricing. Tier: Frozen.",
"monthly_waste_usd": 49.71,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 49.71,
- "rank": 19
+ "rank": 20
+ },
+ {
+ "finding_id": "S3-IA-ucket-01",
+ "category": "Storage Optimisation",
+ "severity": "LOW",
+ "service": "S3",
+ "resource_id": "s3-frontend-dev-bucket-01",
+ "resource_name": "frontend-dev-s3-01",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 1625.0,
+ "access_tier": "Infrequent",
+ "days_since_access": 52,
+ "last_accessed": "2026-04-04",
+ "detail": "1,625 GB bucket last accessed 52 days ago. Move to S3-Infrequent Access tier.",
+ "monthly_waste_usd": 16.82,
+ "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.",
+ "cli_fix": "aws s3api put-bucket-intelligent-tiering-configuration --bucket s3-frontend-dev-bucket-01 --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled",
+ "waste_score": 10.09,
+ "rank": 30
}
]
}
\ No newline at end of file
diff --git a/llm_report.json b/llm_report.json
index 1c4bdd4..82c7afa 100644
--- a/llm_report.json
+++ b/llm_report.json
@@ -1,17 +1,17 @@
{
- "executive_summary": "This audit identified $67,766 in annual waste across 39 findings, with a single idle RDS instance costing $691/month being the top offender. 88% of waste ($4,990) comes from idle resources running in non-production environments that could be safely stopped today. Immediate action on the top 10 findings alone would save $4,607 monthly with zero business impact.",
- "total_monthly_waste": 5647.17,
- "total_annual_waste": 67766.04,
+ "executive_summary": "Your AWS infrastructure is wasting $68,719 annually across 41 resources, with the majority ($59,884) coming from idle EC2 instances and RDS databases running with extremely low utilization. The worst offender is a sandbox RDS instance that's been idle for 68 days, burning $691 monthly with just 2.91% CPU usage. Immediate action on the top 10 findings alone would save $48,516 annually with zero business impact.",
+ "total_monthly_waste": 5726.62,
+ "total_annual_waste": 68719.44,
"findings": [
{
"rank": 1,
"finding_id": "IDLE-RDS-LA8541",
"resource_name": "platform-sandbox-rds-03",
"team": "platform",
- "plain_english": "This database has been sitting idle for 68 days in a sandbox environment, using only 2.91% CPU but still charging full price. RDS instances can't be stopped for more than 7 days, so it keeps auto-restarting and billing continuously.",
- "business_impact": "Wasting $8,294 annually on a database that appears to serve no active purpose in sandbox testing.",
+ "plain_english": "This database has been sitting idle for 68 days in your sandbox environment, using only 2.91% of its CPU capacity. Unlike EC2 instances, RDS can't be stopped for more than 7 days, so it's been continuously charging you $691 per month for doing essentially nothing.",
+ "business_impact": "This single unused database is costing $8,294 annually with zero business value.",
"monthly_saving": 691.2,
- "priority_action": "Create a final backup snapshot and delete the database if no active development needs it.",
+ "priority_action": "Create a final backup snapshot and delete this database immediately if no active development is using it.",
"cli_fix": "aws rds create-db-snapshot --db-instance-identifier db-PLA8541 --db-snapshot-identifier db-PLA8541-final-snap"
},
{
@@ -19,10 +19,10 @@
"finding_id": "IDLE-EC2-938612",
"resource_name": "payments-staging-ec2-16",
"team": "payments",
- "plain_english": "This server has been running for 94 days but only using 4.18% of its computing power, well below our 5% efficiency threshold. It's essentially an expensive computer doing almost nothing.",
- "business_impact": "Burning $6,635 annually on unused computing capacity that could fund actual business initiatives.",
+ "plain_english": "This server has been running for 94 days with only 4.18% CPU usage, well below productive levels. It's essentially a very expensive computer that's turned on but not doing any meaningful work.",
+ "business_impact": "Wasting $6,635 annually on unused staging capacity that could be provisioned on-demand when needed.",
"monthly_saving": 552.9,
- "priority_action": "Stop the instance immediately and implement auto-start/stop scheduling for staging workloads.",
+ "priority_action": "Stop this instance immediately and implement auto-scheduling to start it only when staging tests are needed.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-07517938612 --region ap-south-1"
},
{
@@ -30,10 +30,10 @@
"finding_id": "IDLE-EC2-514789",
"resource_name": "payments-dev-ec2-21",
"team": "payments",
- "plain_english": "This development server has been running for 62 days with only 3.92% CPU usage, indicating no active development work. Development environments should be stopped when not actively coding.",
- "business_impact": "Wasting $6,635 annually on idle development infrastructure that provides zero business value when unused.",
+ "plain_english": "This development server has been running for 62 days with only 3.92% CPU usage. Development environments typically don't need to run 24/7 since developers work normal business hours.",
+ "business_impact": "Burning $6,635 annually on always-on development infrastructure that likely sits unused nights and weekends.",
"monthly_saving": 552.9,
- "priority_action": "Stop the instance and train developers to start/stop resources as needed for active development.",
+ "priority_action": "Stop this instance and train developers to start it only when actively coding.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01420514789 --region eu-west-1"
},
{
@@ -41,10 +41,10 @@
"finding_id": "IDLE-EC2-237817",
"resource_name": "data-eng-sandbox-ec2-23",
"team": "data-eng",
- "plain_english": "This sandbox server for data engineering has been idle for 39 days with 4.04% CPU usage. Sandbox environments should only run during active experimentation or testing.",
- "business_impact": "Throwing away $6,635 annually on unused sandbox capacity that could support actual data engineering experiments.",
+ "plain_english": "This sandbox server for data engineering experiments has been idle for 39 days with 4.04% CPU usage. Sandbox environments should be ephemeral and destroyed when experiments are complete.",
+ "business_impact": "Costing $6,635 annually for experimental infrastructure that should be created and destroyed as needed.",
"monthly_saving": 552.9,
- "priority_action": "Immediately stop the instance and implement scheduled start/stop for sandbox workloads.",
+ "priority_action": "Terminate this instance and establish a policy that sandbox resources must be tagged with auto-deletion dates.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-03090237817 --region ap-south-1"
},
{
@@ -52,10 +52,10 @@
"finding_id": "IDLE-EC2-602734",
"resource_name": "data-eng-prod-ec2-12",
"team": "data-eng",
- "plain_english": "This production server is severely underutilized at 2.18% CPU over 40 days, suggesting the workload doesn't match the server size. Production resources running this idle indicate poor capacity planning.",
- "business_impact": "Wasting $5,875 annually on oversized production infrastructure that could be rightsized or consolidated.",
+ "plain_english": "This production data engineering server has been running for 40 days with only 2.18% CPU usage. Even in production, servers should be right-sized or replaced with auto-scaling solutions.",
+ "business_impact": "Wasting $5,875 annually on oversized production capacity that could be downsized or moved to serverless.",
"monthly_saving": 489.6,
- "priority_action": "Investigate if this workload can be moved to a smaller instance or consolidated with other services.",
+ "priority_action": "Investigate if this workload can be moved to AWS Lambda, Glue, or a smaller instance type.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-04272602734 --region eu-west-1"
},
{
@@ -63,10 +63,10 @@
"finding_id": "IDLE-EC2-899549",
"resource_name": "frontend-staging-ec2-13",
"team": "frontend",
- "plain_english": "This staging server has been running for 22 days with extremely low CPU usage at 1.1%. Staging environments should only run during active testing or deployment activities.",
- "business_impact": "Squandering $5,875 annually on idle staging infrastructure that provides no testing or deployment value when unused.",
+ "plain_english": "This frontend staging server has been running for 22 days with just 1.1% CPU usage. Staging environments for frontend apps can typically be replaced with serverless hosting or on-demand containers.",
+ "business_impact": "Spending $5,875 annually on staging infrastructure that could be replaced with much cheaper serverless alternatives.",
"monthly_saving": 489.6,
- "priority_action": "Stop the instance and implement CI/CD automation to start staging resources only during deployments.",
+ "priority_action": "Migrate this staging environment to AWS App Runner, Amplify, or implement start/stop automation.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-04944899549 --region eu-west-1"
},
{
@@ -74,10 +74,10 @@
"finding_id": "IDLE-EC2-792787",
"resource_name": "frontend-dev-ec2-04",
"team": "frontend",
- "plain_english": "This development server has been running for 106 days with only 2.2% CPU usage, indicating no active frontend development work. Long-running idle dev resources suggest poor resource hygiene practices.",
- "business_impact": "Losing $4,356 annually on abandoned development infrastructure that could fund actual feature development.",
+ "plain_english": "This frontend development server has been running for 106 days with only 2.2% CPU usage. Development servers should be stopped when not actively being used for coding or testing.",
+ "business_impact": "Wasting $4,356 annually on development infrastructure that likely sits unused most of the time.",
"monthly_saving": 363.0,
- "priority_action": "Stop the instance immediately and establish team policies for managing development environment lifecycles.",
+ "priority_action": "Stop this instance and create a simple start/stop script for developers to use when needed.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-02625792787 --region eu-west-1"
},
{
@@ -85,10 +85,10 @@
"finding_id": "IDLE-EC2-494220",
"resource_name": "frontend-staging-ec2-05",
"team": "frontend",
- "plain_english": "This staging server has been idle for 93 days with 2.95% CPU usage, far longer than any reasonable testing cycle. Staging resources should have automatic cleanup after testing phases.",
- "business_impact": "Hemorrhaging $4,356 annually on forgotten staging infrastructure that serves no current testing or validation purpose.",
+ "plain_english": "This frontend staging server has been idle for 93 days with 2.95% CPU usage. Staging environments should only run when testing is actively happening, not 24/7.",
+ "business_impact": "Burning $4,356 annually on staging capacity that could be provisioned on-demand for testing cycles.",
"monthly_saving": 363.0,
- "priority_action": "Stop the instance and implement automated staging environment cleanup after 7 days of inactivity.",
+ "priority_action": "Stop this instance and integrate start/stop automation with your CI/CD pipeline to provision staging on-demand.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-05231494220 --region ap-south-1"
},
{
@@ -96,10 +96,10 @@
"finding_id": "IDLE-EC2-251661",
"resource_name": "payments-sandbox-ec2-07",
"team": "payments",
- "plain_english": "This sandbox server has been running for 51 days with 3.92% CPU usage, indicating no active payment system experimentation. Sandbox resources should only run during active development or testing.",
- "business_impact": "Wasting $3,319 annually on idle sandbox capacity that could support actual payments feature development.",
+ "plain_english": "This payments sandbox server has been running for 51 days with 3.92% CPU usage. Sandbox environments should be temporary and cleaned up regularly to prevent cost accumulation.",
+ "business_impact": "Costing $3,319 annually for experimental infrastructure that provides no ongoing business value.",
"monthly_saving": 276.6,
- "priority_action": "Stop the instance and create documentation for payments team on proper sandbox resource management.",
+ "priority_action": "Terminate this sandbox instance and implement automatic cleanup policies for all sandbox resources.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01240251661 --region ap-south-1"
},
{
@@ -107,38 +107,38 @@
"finding_id": "IDLE-EC2-598054",
"resource_name": "platform-prod-ec2-19",
"team": "platform",
- "plain_english": "This production server has been running for 87 days with only 2.07% CPU usage, suggesting the workload has been moved elsewhere or dramatically reduced. Production resources this idle represent significant optimization opportunities.",
- "business_impact": "Burning $3,319 annually on unused production capacity that could be eliminated or repurposed for actual platform needs.",
+ "plain_english": "This production platform server has been running for 87 days with only 2.07% CPU usage. Production resources should be right-sized and this appears significantly oversized for its actual workload.",
+ "business_impact": "Wasting $3,319 annually on production capacity that could be downsized or consolidated with other services.",
"monthly_saving": 276.6,
- "priority_action": "Verify if this server is still needed for production workloads and terminate or rightsize accordingly.",
+ "priority_action": "Analyze this server's actual workload and either downsize to a smaller instance type or consolidate with other services.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-09822598054 --region eu-west-1"
}
],
"quick_wins": [
- "Stop all development and sandbox EC2 instances immediately - they can be restarted when needed with zero data loss",
- "Create final snapshots of the idle RDS instance and delete it if no active development depends on it",
- "Implement AWS Instance Scheduler to automatically stop non-production resources outside business hours"
+ "Stop all sandbox and development EC2 instances immediately - they can be restarted when needed with zero data loss",
+ "Create final snapshots and delete the idle RDS instance in sandbox that's been unused for 68 days",
+ "Implement AWS Instance Scheduler on all non-production environments to automatically stop instances outside business hours"
],
"team_breakdown": {
"payments": {
"monthly_waste": 1382.4,
- "top_issue": "Multiple idle staging and development instances running continuously when they should be stopped between active development cycles"
+ "top_issue": "Multiple idle staging and development instances that should be stopped when not in use"
+ },
+ "platform": {
+ "monthly_waste": 967.8,
+ "top_issue": "Idle RDS database in sandbox burning $691 monthly for 68 days with minimal usage"
},
"data-eng": {
"monthly_waste": 1042.5,
- "top_issue": "Idle production and sandbox instances indicating poor capacity planning and resource lifecycle management"
+ "top_issue": "Idle servers in both production and sandbox that need right-sizing or termination"
},
"frontend": {
- "monthly_waste": 1215.6,
- "top_issue": "Long-running idle development and staging environments suggesting lack of automated resource cleanup policies"
- },
- "platform": {
- "monthly_waste": 967.8,
- "top_issue": "Expensive idle RDS instance in sandbox environment that should be deleted or converted to serverless"
+ "monthly_waste": 852.6,
+ "top_issue": "Multiple staging and development environments running 24/7 instead of on-demand"
}
},
- "closing_recommendation": "Focus immediately on stopping all non-production idle resources, which represents 85% of the waste and carries zero business risk. Implement automated scheduling and cleanup policies to prevent this waste from recurring, as the current burn rate of $67K annually could fund significant new infrastructure investments.",
- "generated_at": "2026-05-25 18:17",
+ "closing_recommendation": "Implement a company-wide policy requiring all non-production resources to have auto-stop schedules and automatic cleanup dates. The top 10 findings alone represent $48,516 in annual waste that can be eliminated this week with zero impact on development velocity.",
+ "generated_at": "2026-05-26 16:56",
"all_findings": [
{
"finding_id": "IDLE-EC2-334018",
@@ -155,7 +155,7 @@
"recommendation": "Stop or terminate i-09963334018. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-09963334018 --region us-east-1",
"waste_score": 60.0,
- "rank": 17
+ "rank": 18
},
{
"finding_id": "IDLE-EC2-989805",
@@ -172,7 +172,7 @@
"recommendation": "Stop or terminate i-01438989805. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01438989805 --region us-east-1",
"waste_score": 30.0,
- "rank": 21
+ "rank": 22
},
{
"finding_id": "IDLE-EC2-792787",
@@ -308,7 +308,7 @@
"recommendation": "Stop or terminate i-01822873088. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01822873088 --region eu-west-1",
"waste_score": 60.0,
- "rank": 18
+ "rank": 19
},
{
"finding_id": "IDLE-EC2-514789",
@@ -342,7 +342,7 @@
"recommendation": "Stop or terminate i-04875962612. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-04875962612 --region ap-south-1",
"waste_score": 30.0,
- "rank": 22
+ "rank": 23
},
{
"finding_id": "IDLE-EC2-237817",
@@ -376,7 +376,7 @@
"recommendation": "Stop or terminate i-01676168421. If needed occasionally, convert to spot or use auto-start/stop scheduler.",
"cli_fix": "aws ec2 stop-instances --instance-ids i-01676168421 --region eu-west-1",
"waste_score": 30.0,
- "rank": 23
+ "rank": 24
},
{
"finding_id": "IDLE-RDS-LA8541",
@@ -461,7 +461,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-08067372072 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08067372072",
"waste_score": 20.0,
- "rank": 24
+ "rank": 25
},
{
"finding_id": "EBS-UNATTACHED-416213",
@@ -478,7 +478,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-01429416213 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-01429416213",
"waste_score": 20.0,
- "rank": 25
+ "rank": 26
},
{
"finding_id": "EBS-UNATTACHED-285822",
@@ -495,7 +495,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-09303285822 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-09303285822",
"waste_score": 10.0,
- "rank": 29
+ "rank": 31
},
{
"finding_id": "EBS-UNATTACHED-782991",
@@ -512,7 +512,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03271782991 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03271782991",
"waste_score": 10.0,
- "rank": 30
+ "rank": 32
},
{
"finding_id": "EBS-UNATTACHED-872495",
@@ -563,7 +563,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-07393195616 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-07393195616",
"waste_score": 20.0,
- "rank": 26
+ "rank": 27
},
{
"finding_id": "EBS-UNATTACHED-195918",
@@ -580,7 +580,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-05310195918 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-05310195918",
"waste_score": 5.0,
- "rank": 32
+ "rank": 34
},
{
"finding_id": "EBS-UNATTACHED-785916",
@@ -597,7 +597,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-08325785916 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08325785916",
"waste_score": 20.0,
- "rank": 27
+ "rank": 28
},
{
"finding_id": "EBS-UNATTACHED-292475",
@@ -614,7 +614,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03343292475 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03343292475",
"waste_score": 5.0,
- "rank": 33
+ "rank": 35
},
{
"finding_id": "EBS-UNATTACHED-374338",
@@ -648,7 +648,7 @@
"recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.",
"cli_fix": "aws ec2 create-snapshot --volume-id vol-03929454134 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03929454134",
"waste_score": 20.0,
- "rank": 28
+ "rank": 29
},
{
"finding_id": "EIP-UNUSED-640499",
@@ -665,7 +665,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-063640499 --region ap-south-1",
"waste_score": 2.16,
- "rank": 34
+ "rank": 36
},
{
"finding_id": "EIP-UNUSED-062156",
@@ -682,7 +682,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-067062156 --region ap-south-1",
"waste_score": 2.16,
- "rank": 35
+ "rank": 37
},
{
"finding_id": "EIP-UNUSED-813739",
@@ -699,7 +699,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-084813739 --region ap-south-1",
"waste_score": 2.16,
- "rank": 36
+ "rank": 38
},
{
"finding_id": "EIP-UNUSED-600766",
@@ -716,7 +716,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-099600766 --region eu-west-1",
"waste_score": 2.16,
- "rank": 37
+ "rank": 39
},
{
"finding_id": "EIP-UNUSED-276174",
@@ -733,7 +733,7 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-067276174 --region eu-west-1",
"waste_score": 2.16,
- "rank": 38
+ "rank": 40
},
{
"finding_id": "EIP-UNUSED-788100",
@@ -750,7 +750,28 @@
"recommendation": "Release EIP if no longer needed.",
"cli_fix": "aws ec2 release-address --allocation-id eipalloc-044788100 --region ap-south-1",
"waste_score": 2.16,
- "rank": 39
+ "rank": 41
+ },
+ {
+ "finding_id": "S3-COLD-ucket-03",
+ "category": "Storage Optimisation",
+ "severity": "MEDIUM",
+ "service": "S3",
+ "resource_id": "s3-frontend-dev-bucket-03",
+ "resource_name": "frontend-dev-s3-03",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 4951.0,
+ "access_tier": "Cold",
+ "days_since_access": 60,
+ "last_accessed": "2026-03-27",
+ "detail": "4,951 GB bucket not accessed in 60 days but on S3 Standard pricing. Tier: Cold.",
+ "monthly_waste_usd": 62.63,
+ "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json",
+ "waste_score": 62.63,
+ "rank": 17
},
{
"finding_id": "S3-COLD-ucket-04",
@@ -762,12 +783,16 @@
"region": "us-east-1",
"team": "payments",
"environment": "sandbox",
- "detail": "Bucket not accessed in 101 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 3822.0,
+ "access_tier": "Frozen",
+ "days_since_access": 102,
+ "last_accessed": "2026-02-13",
+ "detail": "3,822 GB bucket not accessed in 102 days but on S3 Standard pricing. Tier: Frozen.",
"monthly_waste_usd": 48.35,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-04 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 48.35,
- "rank": 20
+ "rank": 21
},
{
"finding_id": "S3-COLD-ucket-07",
@@ -779,12 +804,16 @@
"region": "us-east-1",
"team": "payments",
"environment": "sandbox",
- "detail": "Bucket not accessed in 78 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 578.0,
+ "access_tier": "Cold",
+ "days_since_access": 79,
+ "last_accessed": "2026-03-08",
+ "detail": "578 GB bucket not accessed in 79 days but on S3 Standard pricing. Tier: Cold.",
"monthly_waste_usd": 7.31,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 7.31,
- "rank": 31
+ "rank": 33
},
{
"finding_id": "S3-COLD-ucket-08",
@@ -796,12 +825,37 @@
"region": "us-east-1",
"team": "ml-ops",
"environment": "staging",
- "detail": "Bucket not accessed in 91 days but on S3 Standard pricing. Move to Glacier.",
+ "size_gb": 3930.0,
+ "access_tier": "Frozen",
+ "days_since_access": 92,
+ "last_accessed": "2026-02-23",
+ "detail": "3,930 GB bucket not accessed in 92 days but on S3 Standard pricing. Tier: Frozen.",
"monthly_waste_usd": 49.71,
"recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.",
"cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json",
"waste_score": 49.71,
- "rank": 19
+ "rank": 20
+ },
+ {
+ "finding_id": "S3-IA-ucket-01",
+ "category": "Storage Optimisation",
+ "severity": "LOW",
+ "service": "S3",
+ "resource_id": "s3-frontend-dev-bucket-01",
+ "resource_name": "frontend-dev-s3-01",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 1625.0,
+ "access_tier": "Infrequent",
+ "days_since_access": 52,
+ "last_accessed": "2026-04-04",
+ "detail": "1,625 GB bucket last accessed 52 days ago. Move to S3-Infrequent Access tier.",
+ "monthly_waste_usd": 16.82,
+ "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.",
+ "cli_fix": "aws s3api put-bucket-intelligent-tiering-configuration --bucket s3-frontend-dev-bucket-01 --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled",
+ "waste_score": 10.09,
+ "rank": 30
}
],
"raw_top10": [
diff --git a/s3_analysis.json b/s3_analysis.json
new file mode 100644
index 0000000..4412caa
--- /dev/null
+++ b/s3_analysis.json
@@ -0,0 +1,168 @@
+{
+ "generated_at": "2026-05-26 15:20",
+ "total_buckets": 8,
+ "total_size_gb": 18745.0,
+ "total_monthly_cost": 431.14,
+ "potential_saving": 184.82,
+ "terminate_candidates": 1,
+ "tier_summary": {
+ "Frozen": {
+ "count": 2,
+ "size_gb": 7752.0,
+ "cost": 178.3
+ },
+ "Cold": {
+ "count": 2,
+ "size_gb": 5529.0,
+ "cost": 127.16
+ },
+ "Infrequent": {
+ "count": 1,
+ "size_gb": 1625.0,
+ "cost": 37.38
+ },
+ "Active": {
+ "count": 3,
+ "size_gb": 3839.0,
+ "cost": 88.3
+ }
+ },
+ "buckets": [
+ {
+ "resource_id": "s3-payments-sandbox-bucket-04",
+ "resource_name": "payments-sandbox-s3-04",
+ "region": "us-east-1",
+ "team": "payments",
+ "environment": "sandbox",
+ "size_gb": 3822.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-02-13",
+ "days_since_access": 102,
+ "access_tier": "Frozen",
+ "monthly_cost_usd": 87.91,
+ "potential_saving": 48.35,
+ "terminate_candidate": true,
+ "recommendation": "Delete bucket (dev/sandbox) or archive to Glacier",
+ "cli_fix": "aws s3 rb s3://s3-payments-sandbox-bucket-04 --force"
+ },
+ {
+ "resource_id": "s3-ml-ops-staging-bucket-08",
+ "resource_name": "ml-ops-staging-s3-08",
+ "region": "us-east-1",
+ "team": "ml-ops",
+ "environment": "staging",
+ "size_gb": 3930.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-02-23",
+ "days_since_access": 92,
+ "access_tier": "Frozen",
+ "monthly_cost_usd": 90.39,
+ "potential_saving": 49.71,
+ "terminate_candidate": false,
+ "recommendation": "Delete bucket (dev/sandbox) or archive to Glacier",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-payments-sandbox-bucket-07",
+ "resource_name": "payments-sandbox-s3-07",
+ "region": "us-east-1",
+ "team": "payments",
+ "environment": "sandbox",
+ "size_gb": 578.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-03-08",
+ "days_since_access": 79,
+ "access_tier": "Cold",
+ "monthly_cost_usd": 13.29,
+ "potential_saving": 7.31,
+ "terminate_candidate": false,
+ "recommendation": "Move to S3 Glacier via Lifecycle rule",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-frontend-dev-bucket-03",
+ "resource_name": "frontend-dev-s3-03",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 4951.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-03-27",
+ "days_since_access": 60,
+ "access_tier": "Cold",
+ "monthly_cost_usd": 113.87,
+ "potential_saving": 62.63,
+ "terminate_candidate": false,
+ "recommendation": "Move to S3 Glacier via Lifecycle rule",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-frontend-dev-bucket-01",
+ "resource_name": "frontend-dev-s3-01",
+ "region": "us-east-1",
+ "team": "frontend",
+ "environment": "dev",
+ "size_gb": 1625.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-04-04",
+ "days_since_access": 52,
+ "access_tier": "Infrequent",
+ "monthly_cost_usd": 37.38,
+ "potential_saving": 16.82,
+ "terminate_candidate": false,
+ "recommendation": "Switch to S3-Infrequent Access or Intelligent-Tiering",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-01 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-data-eng-dev-bucket-02",
+ "resource_name": "data-eng-dev-s3-02",
+ "region": "us-east-1",
+ "team": "data-eng",
+ "environment": "dev",
+ "size_gb": 203.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-04-29",
+ "days_since_access": 27,
+ "access_tier": "Active",
+ "monthly_cost_usd": 4.67,
+ "potential_saving": 0.0,
+ "terminate_candidate": false,
+ "recommendation": "No action needed \u2014 actively used",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-data-eng-dev-bucket-02 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-payments-staging-bucket-06",
+ "resource_name": "payments-staging-s3-06",
+ "region": "us-east-1",
+ "team": "payments",
+ "environment": "staging",
+ "size_gb": 2742.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-05-07",
+ "days_since_access": 19,
+ "access_tier": "Active",
+ "monthly_cost_usd": 63.07,
+ "potential_saving": 0.0,
+ "terminate_candidate": false,
+ "recommendation": "No action needed \u2014 actively used",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-staging-bucket-06 --lifecycle-configuration file://glacier-lifecycle.json"
+ },
+ {
+ "resource_id": "s3-data-eng-dev-bucket-05",
+ "resource_name": "data-eng-dev-s3-05",
+ "region": "us-east-1",
+ "team": "data-eng",
+ "environment": "dev",
+ "size_gb": 894.0,
+ "storage_class": "Standard",
+ "last_accessed": "2026-05-14",
+ "days_since_access": 12,
+ "access_tier": "Active",
+ "monthly_cost_usd": 20.56,
+ "potential_saving": 0.0,
+ "terminate_candidate": false,
+ "recommendation": "No action needed \u2014 actively used",
+ "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-data-eng-dev-bucket-05 --lifecycle-configuration file://glacier-lifecycle.json"
+ }
+ ]
+}
\ No newline at end of file