diff --git a/.gitignore b/.gitignore index c09b568..145b9e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,30 +1,51 @@ -# API keys and secrets — NEVER commit these -SM_api_key -*.key -.env -.env.* -secrets/ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log + +# Ignore any .tfvars files that are generated automatically for each Terraform run. Most +# .tfvars files are managed as part of configuration and so should be included in +# version control. +# +# example.tfvars + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# +# !example_override.tf -# Python virtual environment -.venv/ +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* +plan.out +.terraform.lock.hcl +.DS_Store + +# Python venv/ -env/ __pycache__/ *.pyc *.pyo -*.pyd -.Python -# Generated output files (re-created by pipeline) +# Generated analysis outputs findings.json +s3_analysis.json +billing_report.json llm_report.json -# macOS -.DS_Store +# Real AWS billing exports (contain account data — do not commit) +costs.csv +costs_with_s3.csv -# IDE -.vscode/ -.idea/ - -# Zip archives -*.zip +# Sensitive credentials +SM_api_key diff --git a/dashboard.py b/dashboard.py index e42634f..af8e529 100644 --- a/dashboard.py +++ b/dashboard.py @@ -1,281 +1,686 @@ -import json +import json, os, urllib.request import streamlit as st import plotly.express as px import plotly.graph_objects as go import pandas as pd import requests -from datetime import datetime -# ─── Page config ───────────────────────────────────────────────────────────── -st.set_page_config( - page_title="Cloud Cost Waste Hunter", - page_icon="👻", - layout="wide", - initial_sidebar_state="expanded" -) +st.set_page_config(page_title="Cloud Cost Waste Hunter", page_icon="👻", + layout="wide", initial_sidebar_state="expanded") -# ─── Custom CSS ─────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) -# ─── Load report ───────────────────────────────────────────────────────────── +# ── Load report ──────────────────────────────────────────────────────────────── @st.cache_data def load_report(path="llm_report.json"): with open(path) as f: return json.load(f) -report = load_report() -findings = report["findings"] -all_f = report.get("all_findings", []) -team_data = report.get("team_breakdown", {}) -quick_wins = report.get("quick_wins", []) +@st.cache_data +def load_s3_analysis(path="s3_analysis.json"): + if not os.path.exists(path): + return None + with open(path) as f: + return json.load(f) + +report = load_report() +s3_data = load_s3_analysis() +quick_wins = report.get("quick_wins", []) +raw_findings = report.get("findings", []) + +def normalise(f): + return { + "rank": f.get("rank", 0), + "name": f.get("service", f.get("resource_name", "Unknown")), + "category": f.get("category", f.get("flag", "—")), + "plain_english": f.get("plain_english", ""), + "business_impact": f.get("business_impact", ""), + "monthly_saving": f.get("monthly_opportunity", f.get("monthly_saving", 0.0)), + "priority_action": f.get("priority_action", ""), + "aws_action": f.get("aws_action", f.get("cli_fix", "")), + "severity": f.get("severity", "HIGH" if f.get("monthly_opportunity", f.get("monthly_saving", 0)) > 100 else "MEDIUM"), + } -# ─── Sidebar ───────────────────────────────────────────────────────────────── +findings = [normalise(f) for f in raw_findings] +total_monthly = report.get("total_monthly_opportunity", report.get("total_monthly_waste", 0)) +total_annual = report.get("total_annual_waste", total_monthly * 12) +total_spend = report.get("total_monthly_spend", 0) +raw_services = report.get("raw_data", {}).get("services", []) +all_f_legacy = report.get("all_findings", []) + +# ── Claude chatbot helpers ───────────────────────────────────────────────────── +def build_context(): + lines = [ + "You are a senior FinOps engineer assistant in the Ghost Busters Cloud Cost Waste Hunter dashboard.", + "Answer clearly and concisely, grounding every response in the actual account data below.", + "Keep answers to 3-5 sentences unless the user asks for detail.", + "", + f"Data source: {report.get('source', 'AWS Cost Explorer')}", + f"Monthly spend: ${total_spend:,.2f}" if total_spend else "", + f"Monthly opportunity: ${total_monthly:,.2f}", + f"Executive summary: {report.get('executive_summary', '')}", + "", + "FINDINGS:", + ] + for fi in raw_findings: + lines.append( + f"#{fi.get('rank','')} {fi.get('service', fi.get('resource_name',''))} | " + f"${fi.get('monthly_opportunity', fi.get('monthly_saving', 0)):,.2f}/mo | " + f"{fi.get('plain_english','')[:120]} | " + f"Action: {fi.get('priority_action','')[:80]}" + ) + lines += ["", "QUICK WINS:"] + [f"- {w}" for w in quick_wins] + sb = report.get("service_breakdown", {}) + if sb: + lines += [ + f"Biggest concern: {sb.get('biggest_concern','')}", + f"Watch list: {', '.join(sb.get('watch_list',[]))}", + ] + lines.append(f"Recommendation: {report.get('closing_recommendation','')}") + return "\n".join(l for l in lines if l is not None) + +def call_claude(messages): + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + if not api_key: + return "⚠️ ANTHROPIC_API_KEY not set. Run `export ANTHROPIC_API_KEY='sk-ant-...'` then restart Streamlit." + try: + payload = json.dumps({ + "model": "claude-sonnet-4-20250514", + "max_tokens": 800, + "system": build_context(), + "messages": messages + }).encode() + req = urllib.request.Request( + "https://api.anthropic.com/v1/messages", + data=payload, + headers={"Content-Type":"application/json", + "x-api-key":api_key, + "anthropic-version":"2023-06-01"}, + method="POST" + ) + with urllib.request.urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + return data["content"][0]["text"] + except Exception as e: + return f"❌ Error: {e}" + +if "chat_history" not in st.session_state: + st.session_state.chat_history = [] + +# ── Sidebar ──────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("## 👻 Ghost Busters") st.markdown("*Cloud Cost Waste Hunter*") st.markdown("---") - - all_teams = sorted(set(f["team"] for f in findings)) - selected_teams = st.multiselect( - "Filter by team", all_teams, default=all_teams - ) - - all_severities = ["HIGH", "MEDIUM", "LOW"] - selected_sev = st.multiselect( - "Filter by severity", all_severities, default=all_severities - ) - + categories = sorted(set(f["category"] for f in findings)) + selected_cats = st.multiselect("Filter by category", categories, default=categories) + selected_sev = st.multiselect("Filter by severity", ["HIGH","MEDIUM","LOW"], default=["HIGH","MEDIUM","LOW"]) st.markdown("---") st.markdown("**Slack webhook alert**") slack_url = st.text_input("Webhook URL", placeholder="https://hooks.slack.com/...") - if st.button("🔔 Fire top finding alert", use_container_width=True): if slack_url and findings: top = findings[0] - payload = { - "blocks": [ - {"type": "header", "text": {"type": "plain_text", - "text": "👻 Cloud Cost Waste Hunter Alert"}}, - {"type": "section", "text": {"type": "mrkdwn", - "text": f"*#{top['rank']} — {top['resource_name']}*\n{top['plain_english']}"}}, - {"type": "section", "fields": [ - {"type": "mrkdwn", "text": f"*Monthly saving*\n${top['monthly_saving']:,.2f}"}, - {"type": "mrkdwn", "text": f"*Team*\n{top['team']}"}, - {"type": "mrkdwn", "text": f"*Action*\n{top['priority_action'][:80]}..."} - ]}, - {"type": "divider"}, - {"type": "section", "text": {"type": "mrkdwn", - "text": f"*Total waste across environment:* ${report['total_monthly_waste']:,.2f}/mo (${report['total_annual_waste']:,.2f}/yr)"}} - ] - } + payload = {"blocks":[ + {"type":"header","text":{"type":"plain_text","text":"👻 Cloud Cost Waste Hunter Alert"}}, + {"type":"section","text":{"type":"mrkdwn","text":f"*#{top['rank']} — {top['name']}*\n{top['plain_english']}"}}, + {"type":"section","fields":[ + {"type":"mrkdwn","text":f"*Opportunity*\n${top['monthly_saving']:,.2f}/mo"}, + {"type":"mrkdwn","text":f"*Action*\n{top['priority_action'][:80]}..."} + ]}, + {"type":"section","text":{"type":"mrkdwn","text":f"*Total opportunity:* ${total_monthly:,.2f}/mo"}} + ]} try: r = requests.post(slack_url, json=payload, timeout=5) - if r.status_code == 200: - st.success("✅ Alert sent!") - else: - st.error(f"Failed: {r.status_code}") + st.success("✅ Sent!") if r.status_code==200 else st.error(f"Failed: {r.status_code}") except Exception as e: - st.error(f"Error: {e}") + st.error(str(e)) else: st.warning("Enter a Slack webhook URL first") - st.markdown("---") - st.caption(f"Report generated: {report.get('generated_at','—')}") + st.caption(f"Generated: {report.get('generated_at','—')}") + if report.get("source"): st.caption(f"Source: {report['source']}") -# ─── Header ─────────────────────────────────────────────────────────────────── +# ── Page header ─────────────────────────────────────────────────────────────── st.markdown('
👻 Cloud Cost Waste Hunter
', unsafe_allow_html=True) -st.markdown('
AI-powered AWS infrastructure waste detection · Perforce Global Jam 2026
', unsafe_allow_html=True) - -# ─── Metric cards ───────────────────────────────────────────────────────────── -c1, c2, c3, c4 = st.columns(4) -with c1: - st.markdown(f"""
-
Monthly waste
-
${report['total_monthly_waste']:,.0f}
-
recoverable this month
-
""", unsafe_allow_html=True) -with c2: - st.markdown(f"""
-
Annual waste
-
${report['total_annual_waste']:,.0f}
-
if left unaddressed
-
""", unsafe_allow_html=True) -with c3: - st.markdown(f"""
-
Total findings
-
{len(all_f)}
-
across {len(team_data)} teams
-
""", unsafe_allow_html=True) -with c4: - top_team = max(team_data, key=lambda t: team_data[t]["monthly_waste"]) if team_data else "—" - top_waste = team_data[top_team]["monthly_waste"] if team_data else 0 - st.markdown(f"""
-
Top offending team
-
{top_team}
-
${top_waste:,.0f}/mo wasted
-
""", unsafe_allow_html=True) - -st.markdown("
", unsafe_allow_html=True) - -# ─── Executive summary ──────────────────────────────────────────────────────── -st.markdown(f'
🤖 AI Summary
{report["executive_summary"]}
', - unsafe_allow_html=True) - -# ─── Charts row ─────────────────────────────────────────────────────────────── -col_l, col_r = st.columns(2) - -with col_l: - st.markdown("#### Waste by team") - if team_data: - team_df = pd.DataFrame([ - {"Team": t, "Monthly Waste ($)": v["monthly_waste"], "Top Issue": v["top_issue"]} - for t, v in sorted(team_data.items(), key=lambda x: -x[1]["monthly_waste"]) - ]) - fig = px.bar(team_df, x="Monthly Waste ($)", y="Team", orientation="h", - color="Monthly Waste ($)", color_continuous_scale=["#fde8e8","#e05252"], - text="Monthly Waste ($)", hover_data=["Top Issue"]) - fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside") - fig.update_layout(showlegend=False, coloraxis_showscale=False, - plot_bgcolor="white", paper_bgcolor="white", - margin=dict(l=0, r=60, t=10, b=0), height=280, - yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0")) - st.plotly_chart(fig, use_container_width=True) - -with col_r: - st.markdown("#### Waste by category") - if all_f: - cat_totals = {} - for f in all_f: - cat_totals[f["category"]] = cat_totals.get(f["category"], 0) + f["monthly_waste_usd"] - cat_df = pd.DataFrame([ - {"Category": k, "Monthly Waste ($)": round(v, 2)} - for k, v in sorted(cat_totals.items(), key=lambda x: -x[1]) - ]) - colors = ["#e05252", "#f59e0b", "#3b82f6", "#8b5cf6", "#10b981"] - fig2 = px.pie(cat_df, values="Monthly Waste ($)", names="Category", - color_discrete_sequence=colors, hole=0.45) - fig2.update_traces(textposition="outside", textinfo="label+percent") - fig2.update_layout(showlegend=False, paper_bgcolor="white", - margin=dict(l=0, r=0, t=10, b=0), height=280) - st.plotly_chart(fig2, use_container_width=True) - -# ─── Quick wins ─────────────────────────────────────────────────────────────── -st.markdown("#### ⚡ Quick wins — do these today") -qcols = st.columns(3) -for i, (win, col) in enumerate(zip(quick_wins, qcols)): - with col: - st.markdown(f'
✅ {win}
', unsafe_allow_html=True) - -st.markdown("
", unsafe_allow_html=True) - -# ─── Top findings ───────────────────────────────────────────────────────────── -st.markdown("#### 🔍 Top findings") - -filtered = [ - f for f in findings - if f["team"] in selected_teams - and report["raw_top10"][f["rank"]-1]["severity"] in selected_sev -] - -if not filtered: - st.info("No findings match the current filters.") -else: - show_cli = st.toggle("Show CLI remediation commands", value=False) - - for f in filtered: - raw = report["raw_top10"][f["rank"]-1] - sev = raw.get("severity", "MEDIUM").lower() - card_class = f"finding-card {sev}" - - cli_html = "" - if show_cli: - cli_html = f'
$ {f["cli_fix"]}
' - - st.markdown(f""" -
-
FINDING #{f['rank']}
-
{f['resource_name']}
-
{f['plain_english']}
-
- Impact: {f['business_impact']} -
-
- {sev.upper()} - 👤 {f['team']} - ☁️ {raw.get('service','')} - 💰 ${f['monthly_saving']:,.2f}/mo saving -
-
- 🔧 {f['priority_action']} -
- {cli_html} +st.markdown('
AI-powered AWS cost analysis · Perforce Global Jam 2026
', unsafe_allow_html=True) +if report.get("source"): + st.markdown(f'📊 {report["source"]}', unsafe_allow_html=True) + +# ── MAIN LAYOUT: left 62% content | right 38% chatbot ───────────────────────── +main_col, chat_col = st.columns([0.62, 0.38]) + +with main_col: + tab_overview, tab_s3 = st.tabs(["📊 Overview", "🪣 S3 Deep Dive"]) + + # ── TAB 1: Overview ─────────────────────────────────────────────────────── + with tab_overview: + # Metric cards + c1, c2, c3, c4 = st.columns(4) + with c1: + st.markdown(f"""
+
Monthly opportunity
+
${total_monthly:,.0f}
+
recoverable now
+
""", unsafe_allow_html=True) + with c2: + st.markdown(f"""
+
Annual opportunity
+
${total_annual:,.0f}
+
if unaddressed
+
""", unsafe_allow_html=True) + with c3: + st.markdown(f"""
+
Findings
+
{len(findings)}
+
services flagged
+
""", unsafe_allow_html=True) + with c4: + if total_spend > 0: + pct = round((total_monthly / total_spend) * 100, 1) + st.markdown(f"""
+
Total spend
+
${total_spend:,.0f}
+
{pct}% recoverable
+
""", unsafe_allow_html=True) + else: + top_f = findings[0] if findings else {} + st.markdown(f"""
+
Top finding
+
{top_f.get('name','—')[:12]}
+
${top_f.get('monthly_saving',0):,.0f}/mo
+
""", unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # AI summary + st.markdown(f'
🤖 AI Summary
{report["executive_summary"]}
', + unsafe_allow_html=True) + + # Charts + chart_l, chart_r = st.columns(2) + with chart_l: + st.markdown("#### Cost by service") + src = raw_services or [] + if src: + svc_df = pd.DataFrame([ + {"Service": s["service"][:22], "April ($)": s["apr_2026"]} + for s in sorted(src, key=lambda x: -x["apr_2026"])[:8] + ]) + fig = px.bar(svc_df, x="April ($)", y="Service", orientation="h", + color="April ($)", color_continuous_scale=["#fde8e8","#e05252"], text="April ($)") + fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside") + fig.update_layout(showlegend=False, coloraxis_showscale=False, + plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0,r=60,t=10,b=0), height=260, + yaxis=dict(showgrid=False), xaxis=dict(showgrid=True,gridcolor="#f0f0f0")) + st.plotly_chart(fig, use_container_width=True) + elif all_f_legacy: + svc_t = {} + for f in all_f_legacy: + svc_t[f.get("service","Other")] = svc_t.get(f.get("service","Other"),0)+f.get("monthly_waste_usd",0) + sdf = pd.DataFrame([{"Service":k,"Waste ($)":round(v,2)} for k,v in sorted(svc_t.items(),key=lambda x:-x[1])]) + fig = px.bar(sdf,x="Waste ($)",y="Service",orientation="h", + color="Waste ($)",color_continuous_scale=["#fde8e8","#e05252"],text="Waste ($)") + fig.update_traces(texttemplate="$%{text:,.0f}",textposition="outside") + fig.update_layout(showlegend=False,coloraxis_showscale=False, + plot_bgcolor="white",paper_bgcolor="white", + margin=dict(l=0,r=60,t=10,b=0),height=260, + yaxis=dict(showgrid=False),xaxis=dict(showgrid=True,gridcolor="#f0f0f0")) + st.plotly_chart(fig, use_container_width=True) + + with chart_r: + st.markdown("#### Opportunity by category") + cat_t = {} + for f in findings: + cat_t[f["category"]] = cat_t.get(f["category"],0) + f["monthly_saving"] + if cat_t: + cdf = pd.DataFrame([{"Category":k,"Opp ($)":round(v,2)} for k,v in sorted(cat_t.items(),key=lambda x:-x[1]) if v>0]) + fig2 = px.pie(cdf,values="Opp ($)",names="Category", + color_discrete_sequence=["#e05252","#f59e0b","#3b82f6","#8b5cf6","#10b981"],hole=0.45) + fig2.update_traces(textposition="outside",textinfo="label+percent") + fig2.update_layout(showlegend=False,paper_bgcolor="white", + margin=dict(l=0,r=0,t=10,b=0),height=260) + st.plotly_chart(fig2, use_container_width=True) + + # Quick wins + if quick_wins: + st.markdown("#### ⚡ Quick wins") + for w in quick_wins[:3]: + st.markdown(f'
✅ {w}
', unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # Findings + st.markdown("#### 🔍 Flagged services") + filtered = [f for f in findings if f["category"] in selected_cats and f["severity"] in selected_sev] + if not filtered: + st.info("No findings match filters.") + else: + show_action = st.toggle("Show AWS remediation actions", value=False) + for f in filtered: + sev = f["severity"].lower() + action_html = f'
$ {f["aws_action"]}
' if show_action and f["aws_action"] else "" + saving = f"${f['monthly_saving']:,.2f}/mo opportunity" if f["monthly_saving"] > 0 else "Investigate" + st.markdown(f""" +
+
FINDING #{f['rank']}
+
{f['name']}
+
{f['plain_english']}
+
Impact: {f['business_impact']}
+
+ {f['severity']} + 🏷 {f['category']} + 💰 {saving} +
+
🔧 {f['priority_action']}
+ {action_html} +
""", unsafe_allow_html=True) + + # Service insights + sb = report.get("service_breakdown", {}) + if sb: + st.markdown("---") + st.markdown("#### 📊 Service insights") + si1, si2 = st.columns(2) + with si1: + if sb.get("biggest_concern"): st.error(f"🚨 **Biggest concern:** {sb['biggest_concern']}") + if sb.get("most_improved"): st.success(f"✅ **Most improved:** {sb['most_improved']}") + with si2: + if sb.get("watch_list"): st.warning(f"👀 **Watch list:** {', '.join(sb['watch_list'])}") + + st.markdown("---") + st.markdown("#### 📋 Leadership recommendation") + st.info(report.get("closing_recommendation", "")) + st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter") + + # ── TAB 2: S3 Deep Dive ─────────────────────────────────────────────────── + with tab_s3: + if s3_data is None: + st.warning("Run `python3 detection_engine.py` first to generate s3_analysis.json") + else: + buckets = s3_data["buckets"] + tier_summary = s3_data.get("tier_summary", {}) + bdf = pd.DataFrame(buckets) + TIER_COLOR = {"Active": "#10b981", "Infrequent": "#f59e0b", + "Cold": "#f97316", "Frozen": "#e05252"} + annual_saving = round(s3_data["potential_saving"] * 12, 0) + saving_pct = round( + s3_data["potential_saving"] / s3_data["total_monthly_cost"] * 100, 1 + ) if s3_data["total_monthly_cost"] else 0 + + # ── S3 metric cards ──────────────────────────────────────────────── + m1, m2, m3, m4 = st.columns(4) + with m1: + st.markdown(f"""
+
Total S3 spend
+
${s3_data['total_monthly_cost']:,.0f}/mo
+
{s3_data['total_buckets']} buckets tracked
+
""", unsafe_allow_html=True) + with m2: + st.markdown(f"""
+
Potential monthly savings
+
${s3_data['potential_saving']:,.0f}/mo
+
{saving_pct}% of S3 spend recoverable
+
""", unsafe_allow_html=True) + with m3: + st.markdown(f"""
+
Annual savings opportunity
+
${annual_saving:,.0f}
+
if actioned today
+
""", unsafe_allow_html=True) + with m4: + st.markdown(f"""
+
Total stored
+
{s3_data['total_size_gb']:,.0f} GB
+
{s3_data['terminate_candidates']} bucket(s) ready for deletion
+
""", unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # ── Charts row ──────────────────────────────────────────────────── + ch1, ch2 = st.columns(2) + with ch1: + st.markdown("#### Buckets by access tier") + if tier_summary: + tier_df = pd.DataFrame([ + {"Tier": t, "Buckets": v["count"], + "Size (GB)": v["size_gb"], "Cost ($)": v["cost"]} + for t, v in tier_summary.items() + ]) + tier_order = ["Active", "Infrequent", "Cold", "Frozen"] + tier_df["Tier"] = pd.Categorical( + tier_df["Tier"], categories=[t for t in tier_order if t in tier_df["Tier"].values], ordered=True + ) + tier_df = tier_df.sort_values("Tier") + colors = [TIER_COLOR.get(t, "#888") for t in tier_df["Tier"]] + fig_tier = px.bar( + tier_df, x="Tier", y="Size (GB)", color="Tier", + color_discrete_map=TIER_COLOR, + text="Buckets", + custom_data=["Cost ($)"], + ) + fig_tier.update_traces( + texttemplate="%{text} bucket(s)", + textposition="outside", + hovertemplate="%{x}
Size: %{y:,.0f} GB
Cost: $%{customdata[0]:,.2f}/mo" + ) + fig_tier.update_layout( + showlegend=False, plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0,r=0,t=10,b=0), height=260, + xaxis=dict(showgrid=False), yaxis=dict(showgrid=True, gridcolor="#f0f0f0", title="Storage (GB)") + ) + st.plotly_chart(fig_tier, use_container_width=True) + + with ch2: + st.markdown("#### Current cost vs potential saving — top buckets") + top_bdf = bdf.nlargest(min(10, len(bdf)), "monthly_cost_usd") + fig_grouped = go.Figure() + fig_grouped.add_trace(go.Bar( + name="Current cost", + y=top_bdf["resource_name"], + x=top_bdf["monthly_cost_usd"], + orientation="h", + marker_color="#e05252", + text=[f"${v:,.2f}" for v in top_bdf["monthly_cost_usd"]], + textposition="outside", + )) + fig_grouped.add_trace(go.Bar( + name="Potential saving", + y=top_bdf["resource_name"], + x=top_bdf["potential_saving"], + orientation="h", + marker_color="#10b981", + text=[f"${v:,.2f}" if v > 0 else "" for v in top_bdf["potential_saving"]], + textposition="outside", + )) + fig_grouped.update_layout( + barmode="group", + plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0, r=60, t=10, b=0), height=280, + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + xaxis=dict(showgrid=True, gridcolor="#f0f0f0", tickprefix="$"), + yaxis=dict(showgrid=False, autorange="reversed"), + ) + st.plotly_chart(fig_grouped, use_container_width=True) + + # ── Last-accessed timeline with tier thresholds ─────────────────── + st.markdown("#### 📅 Days idle per bucket — access tier thresholds") + bdf_sorted = bdf.sort_values("days_since_access", ascending=False).reset_index(drop=True) + fig_timeline = px.bar( + bdf_sorted, x="resource_name", y="days_since_access", + color="access_tier", color_discrete_map=TIER_COLOR, + text="days_since_access", + labels={"resource_name": "Bucket", "days_since_access": "Days idle"}, + custom_data=["size_gb", "monthly_cost_usd", "last_accessed"], + ) + fig_timeline.update_traces( + texttemplate="%{text}d", + textposition="outside", + hovertemplate="%{x}
Last accessed: %{customdata[2]}
Days idle: %{y}
Size: %{customdata[0]:,.0f} GB
Cost: $%{customdata[1]:,.2f}/mo" + ) + fig_timeline.add_hline(y=30, line_dash="dash", line_color="#f59e0b", line_width=1.5, + annotation_text="IA (30d)", annotation_position="top right", + annotation_font=dict(size=10, color="#f59e0b")) + fig_timeline.add_hline(y=60, line_dash="dash", line_color="#f97316", line_width=1.5, + annotation_text="Cold (60d)", annotation_position="top right", + annotation_font=dict(size=10, color="#f97316")) + fig_timeline.add_hline(y=90, line_dash="dash", line_color="#e05252", line_width=1.5, + annotation_text="Frozen (90d)", annotation_position="top right", + annotation_font=dict(size=10, color="#e05252")) + fig_timeline.update_layout( + showlegend=True, plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0, r=0, t=10, b=80), height=320, + xaxis=dict(showgrid=False, tickangle=-30), + yaxis=dict(showgrid=True, gridcolor="#f0f0f0", title="Days idle"), + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + ) + st.plotly_chart(fig_timeline, use_container_width=True) + + # ── Savings breakdown + Environment cost ─────────────────────────── + sb1, sb2 = st.columns(2) + with sb1: + st.markdown("#### 💰 Savings breakdown by action type") + action_buckets = [b for b in buckets if b["potential_saving"] > 0] + if action_buckets: + def _action_label(b): + if b["terminate_candidate"]: return "Delete (frozen dev/sandbox)" + if b["access_tier"] == "Frozen": return "Archive to Glacier" + if b["access_tier"] == "Cold": return "Move to Glacier" + if b["access_tier"] == "Infrequent": return "Switch to S3-IA" + return "Other" + action_totals: dict = {} + for b in action_buckets: + lbl = _action_label(b) + action_totals[lbl] = action_totals.get(lbl, 0) + b["potential_saving"] + adf = pd.DataFrame([ + {"Action": k, "Saving ($/mo)": round(v, 2)} + for k, v in sorted(action_totals.items(), key=lambda x: -x[1]) + ]) + fig_donut = px.pie( + adf, values="Saving ($/mo)", names="Action", + color_discrete_sequence=["#e05252","#f97316","#f59e0b","#10b981","#3b82f6"], + hole=0.52, + ) + fig_donut.update_traces( + textposition="outside", textinfo="label+percent", + hovertemplate="%{label}
Save $%{value:,.2f}/mo" + ) + fig_donut.update_layout( + showlegend=False, paper_bgcolor="white", + margin=dict(l=0, r=0, t=10, b=0), height=260, + ) + st.plotly_chart(fig_donut, use_container_width=True) + else: + st.info("No savings opportunities identified.") + with sb2: + st.markdown("#### 🏗️ Cost by environment") + env_df = bdf.groupby("environment", as_index=False).agg( + monthly_cost=("monthly_cost_usd", "sum"), + potential_saving=("potential_saving", "sum"), + buckets=("resource_id", "count"), + ).sort_values("monthly_cost", ascending=True) + fig_env = go.Figure() + fig_env.add_trace(go.Bar( + name="Current cost", + y=env_df["environment"], + x=env_df["monthly_cost"], + orientation="h", + marker_color="#e05252", + text=[f"${v:,.2f}" for v in env_df["monthly_cost"]], + textposition="outside", + )) + fig_env.add_trace(go.Bar( + name="Potential saving", + y=env_df["environment"], + x=env_df["potential_saving"], + orientation="h", + marker_color="#10b981", + text=[f"${v:,.2f}" if v > 0 else "" for v in env_df["potential_saving"]], + textposition="outside", + )) + fig_env.update_layout( + barmode="group", + plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0, r=60, t=10, b=0), height=260, + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + xaxis=dict(showgrid=True, gridcolor="#f0f0f0", tickprefix="$"), + yaxis=dict(showgrid=False), + ) + st.plotly_chart(fig_env, use_container_width=True) + + # ── Full bucket table ────────────────────────────────────────────── + st.markdown("#### 🗂️ All S3 buckets — detailed view") + s3_filter_col1, s3_filter_col2 = st.columns(2) + with s3_filter_col1: + tier_filter = st.multiselect( + "Filter by access tier", + ["Active", "Infrequent", "Cold", "Frozen"], + default=["Active", "Infrequent", "Cold", "Frozen"], + key="s3_tier_filter" + ) + with s3_filter_col2: + env_filter = st.multiselect( + "Filter by environment", + sorted(bdf["environment"].unique()), + default=sorted(bdf["environment"].unique()), + key="s3_env_filter" + ) + show_s3_cli = st.toggle("Show termination / remediation CLI", value=False, key="s3_cli_toggle") + + filtered_buckets = [ + b for b in buckets + if b["access_tier"] in tier_filter and b["environment"] in env_filter + ] + + for b in filtered_buckets: + tier_col = TIER_COLOR.get(b["access_tier"], "#888") + terminate_badge = ( + '🔴 TERMINATE' + if b["terminate_candidate"] else "" + ) + cli_html = ( + f'
$ {b["cli_fix"]}
' + if show_s3_cli else "" + ) + # Pre-compute dollar strings — avoids Streamlit treating $X as LaTeX + size_str = f'{b["size_gb"]:,.0f} GB' + cost_str = f'USD {b["monthly_cost_usd"]:,.2f}/mo' + saving_html = ( + f'' + f'▲ Save USD {b["potential_saving"]:,.2f}/mo' + if b["potential_saving"] > 0 else "" + ) + st.markdown(f""" +
+
+
+
{b['resource_name']}
+
{b['resource_id']} · {b['region']} · {b['team']} · {b['environment']}
+
+
+ {size_str}  |  {cost_str} +
+
+
+ + {b['access_tier']} + + 📅 Last accessed: {b['last_accessed']} + ⏱ {b['days_since_access']}d idle + {terminate_badge} +
+
+ 💡 {b['recommendation']}{saving_html} +
+ {cli_html} +
""", unsafe_allow_html=True) + + # ── Terminate candidates summary ─────────────────────────────────── + terminate_list = [b for b in buckets if b["terminate_candidate"]] + if terminate_list: + st.markdown("---") + st.markdown("#### 🗑️ Termination candidates") + st.error( + f"**{len(terminate_list)} bucket(s)** in dev/sandbox environments have not been accessed " + f"in 90+ days. These are strong candidates for deletion." + ) + total_term_saving = sum(b["monthly_cost_usd"] for b in terminate_list) + total_term_gb = sum(b["size_gb"] for b in terminate_list) + st.markdown( + f"Deleting them would free **{total_term_gb:,.0f} GB** and save " + f"**${total_term_saving:,.2f}/mo** (${total_term_saving*12:,.0f}/yr)." + ) + for b in terminate_list: + st.markdown( + f'
$ {b["cli_fix"]}
', + unsafe_allow_html=True + ) + +# ── RIGHT PANEL: FinOps AI Chatbot ──────────────────────────────────────────── +with chat_col: + st.markdown(""" +
+
🤖 FinOps AI
+
+ Ask anything about your AWS costs
- """, unsafe_allow_html=True) +
+ """, unsafe_allow_html=True) + + # Suggested questions + suggestions = [ + "Which service should I fix first?", + "Why did EC2-Other spike?", + "How much can we save on Neptune?", + "What is the DevOpsAgent charge?", + "Give me a 3-step action plan", + ] + st.markdown("

💡 Suggested questions:

", + unsafe_allow_html=True) + for i, sug in enumerate(suggestions): + if st.button(sug, key=f"sug_{i}", use_container_width=True): + st.session_state.chat_history.append({"role":"user","content":sug}) + with st.spinner("Thinking..."): + ans = call_claude(st.session_state.chat_history) + st.session_state.chat_history.append({"role":"assistant","content":ans}) + + st.markdown("
", unsafe_allow_html=True) + + # Chat history + for msg in st.session_state.chat_history: + if msg["role"] == "user": + st.markdown( + f"
You: {msg['content']}
", + unsafe_allow_html=True) + else: + st.markdown( + f"
🤖 FinOps AI: {msg['content']}
", + unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # Input + if prompt_input := st.chat_input("Ask about your AWS costs..."): + st.session_state.chat_history.append({"role":"user","content":prompt_input}) + with st.spinner("Thinking..."): + ans = call_claude(st.session_state.chat_history) + st.session_state.chat_history.append({"role":"assistant","content":ans}) + st.rerun() -# ─── Recommendation ─────────────────────────────────────────────────────────── -st.markdown("---") -st.markdown("#### 📋 Leadership recommendation") -st.info(report.get("closing_recommendation", "")) -st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter") + if st.session_state.chat_history: + if st.button("🗑️ Clear chat", use_container_width=True): + st.session_state.chat_history = [] + st.rerun() \ No newline at end of file diff --git a/dashboard_AI.py b/dashboard_AI.py new file mode 100644 index 0000000..40692a4 --- /dev/null +++ b/dashboard_AI.py @@ -0,0 +1,370 @@ +import json, os, urllib.request +import streamlit as st +import plotly.express as px +import pandas as pd +import requests + +st.set_page_config(page_title="Cloud Cost Waste Hunter", page_icon="👻", + layout="wide", initial_sidebar_state="expanded") + +st.markdown(""" + +""", unsafe_allow_html=True) + +# ── Load report ──────────────────────────────────────────────────────────────── +@st.cache_data +def load_report(path="llm_report.json"): + with open(path) as f: + return json.load(f) + +report = load_report() +quick_wins = report.get("quick_wins", []) +raw_findings = report.get("findings", []) + +def normalise(f): + return { + "rank": f.get("rank", 0), + "name": f.get("service", f.get("resource_name", "Unknown")), + "category": f.get("category", f.get("flag", "—")), + "plain_english": f.get("plain_english", ""), + "business_impact": f.get("business_impact", ""), + "monthly_saving": f.get("monthly_opportunity", f.get("monthly_saving", 0.0)), + "priority_action": f.get("priority_action", ""), + "aws_action": f.get("aws_action", f.get("cli_fix", "")), + "severity": f.get("severity", "HIGH" if f.get("monthly_opportunity", f.get("monthly_saving", 0)) > 100 else "MEDIUM"), + } + +findings = [normalise(f) for f in raw_findings] +total_monthly = report.get("total_monthly_opportunity", report.get("total_monthly_waste", 0)) +total_annual = report.get("total_annual_waste", total_monthly * 12) +total_spend = report.get("total_monthly_spend", 0) +raw_services = report.get("raw_data", {}).get("services", []) +all_f_legacy = report.get("all_findings", []) + +# ── Claude chatbot helpers ───────────────────────────────────────────────────── +def build_context(): + lines = [ + "You are a senior FinOps engineer assistant in the Ghost Busters Cloud Cost Waste Hunter dashboard.", + "Answer clearly and concisely, grounding every response in the actual account data below.", + "Keep answers to 3-5 sentences unless the user asks for detail.", + "", + f"Data source: {report.get('source', 'AWS Cost Explorer')}", + f"Monthly spend: ${total_spend:,.2f}" if total_spend else "", + f"Monthly opportunity: ${total_monthly:,.2f}", + f"Executive summary: {report.get('executive_summary', '')}", + "", + "FINDINGS:", + ] + for fi in raw_findings: + lines.append( + f"#{fi.get('rank','')} {fi.get('service', fi.get('resource_name',''))} | " + f"${fi.get('monthly_opportunity', fi.get('monthly_saving', 0)):,.2f}/mo | " + f"{fi.get('plain_english','')[:120]} | " + f"Action: {fi.get('priority_action','')[:80]}" + ) + lines += ["", "QUICK WINS:"] + [f"- {w}" for w in quick_wins] + sb = report.get("service_breakdown", {}) + if sb: + lines += [ + f"Biggest concern: {sb.get('biggest_concern','')}", + f"Watch list: {', '.join(sb.get('watch_list',[]))}", + ] + lines.append(f"Recommendation: {report.get('closing_recommendation','')}") + return "\n".join(l for l in lines if l is not None) + +def call_claude(messages): + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + if not api_key: + return "⚠️ ANTHROPIC_API_KEY not set. Run `export ANTHROPIC_API_KEY='sk-ant-...'` then restart Streamlit." + try: + payload = json.dumps({ + "model": "claude-sonnet-4-20250514", + "max_tokens": 800, + "system": build_context(), + "messages": messages + }).encode() + req = urllib.request.Request( + "https://api.anthropic.com/v1/messages", + data=payload, + headers={"Content-Type":"application/json", + "x-api-key":api_key, + "anthropic-version":"2023-06-01"}, + method="POST" + ) + with urllib.request.urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + return data["content"][0]["text"] + except Exception as e: + return f"❌ Error: {e}" + +if "chat_history" not in st.session_state: + st.session_state.chat_history = [] + +# ── Sidebar ──────────────────────────────────────────────────────────────────── +with st.sidebar: + st.markdown("## 👻 Ghost Busters") + st.markdown("*Cloud Cost Waste Hunter*") + st.markdown("---") + categories = sorted(set(f["category"] for f in findings)) + selected_cats = st.multiselect("Filter by category", categories, default=categories) + selected_sev = st.multiselect("Filter by severity", ["HIGH","MEDIUM","LOW"], default=["HIGH","MEDIUM","LOW"]) + st.markdown("---") + st.markdown("**Slack webhook alert**") + slack_url = st.text_input("Webhook URL", placeholder="https://hooks.slack.com/...") + if st.button("🔔 Fire top finding alert", use_container_width=True): + if slack_url and findings: + top = findings[0] + payload = {"blocks":[ + {"type":"header","text":{"type":"plain_text","text":"👻 Cloud Cost Waste Hunter Alert"}}, + {"type":"section","text":{"type":"mrkdwn","text":f"*#{top['rank']} — {top['name']}*\n{top['plain_english']}"}}, + {"type":"section","fields":[ + {"type":"mrkdwn","text":f"*Opportunity*\n${top['monthly_saving']:,.2f}/mo"}, + {"type":"mrkdwn","text":f"*Action*\n{top['priority_action'][:80]}..."} + ]}, + {"type":"section","text":{"type":"mrkdwn","text":f"*Total opportunity:* ${total_monthly:,.2f}/mo"}} + ]} + try: + r = requests.post(slack_url, json=payload, timeout=5) + st.success("✅ Sent!") if r.status_code==200 else st.error(f"Failed: {r.status_code}") + except Exception as e: + st.error(str(e)) + else: + st.warning("Enter a Slack webhook URL first") + st.markdown("---") + st.caption(f"Generated: {report.get('generated_at','—')}") + if report.get("source"): st.caption(f"Source: {report['source']}") + +# ── Page header ─────────────────────────────────────────────────────────────── +st.markdown('
👻 Cloud Cost Waste Hunter
', unsafe_allow_html=True) +st.markdown('
AI-powered AWS cost analysis · Perforce Global Jam 2026
', unsafe_allow_html=True) +if report.get("source"): + st.markdown(f'📊 {report["source"]}', unsafe_allow_html=True) + +# ── MAIN LAYOUT: left 62% content | right 38% chatbot ───────────────────────── +main_col, chat_col = st.columns([0.62, 0.38]) + +with main_col: + # Metric cards + c1, c2, c3, c4 = st.columns(4) + with c1: + st.markdown(f"""
+
Monthly opportunity
+
${total_monthly:,.0f}
+
recoverable now
+
""", unsafe_allow_html=True) + with c2: + st.markdown(f"""
+
Annual opportunity
+
${total_annual:,.0f}
+
if unaddressed
+
""", unsafe_allow_html=True) + with c3: + st.markdown(f"""
+
Findings
+
{len(findings)}
+
services flagged
+
""", unsafe_allow_html=True) + with c4: + if total_spend > 0: + pct = round((total_monthly / total_spend) * 100, 1) + st.markdown(f"""
+
Total spend
+
${total_spend:,.0f}
+
{pct}% recoverable
+
""", unsafe_allow_html=True) + else: + top_f = findings[0] if findings else {} + st.markdown(f"""
+
Top finding
+
{top_f.get('name','—')[:12]}
+
${top_f.get('monthly_saving',0):,.0f}/mo
+
""", unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # AI summary + st.markdown(f'
🤖 AI Summary
{report["executive_summary"]}
', + unsafe_allow_html=True) + + # Charts + chart_l, chart_r = st.columns(2) + with chart_l: + st.markdown("#### Cost by service") + src = raw_services or [] + if src: + svc_df = pd.DataFrame([ + {"Service": s["service"][:22], "April ($)": s["apr_2026"]} + for s in sorted(src, key=lambda x: -x["apr_2026"])[:8] + ]) + fig = px.bar(svc_df, x="April ($)", y="Service", orientation="h", + color="April ($)", color_continuous_scale=["#fde8e8","#e05252"], text="April ($)") + fig.update_traces(texttemplate="$%{text:,.0f}", textposition="outside") + fig.update_layout(showlegend=False, coloraxis_showscale=False, + plot_bgcolor="white", paper_bgcolor="white", + margin=dict(l=0,r=60,t=10,b=0), height=260, + yaxis=dict(showgrid=False), xaxis=dict(showgrid=True,gridcolor="#f0f0f0")) + st.plotly_chart(fig, use_container_width=True) + elif all_f_legacy: + svc_t = {} + for f in all_f_legacy: + svc_t[f.get("service","Other")] = svc_t.get(f.get("service","Other"),0)+f.get("monthly_waste_usd",0) + sdf = pd.DataFrame([{"Service":k,"Waste ($)":round(v,2)} for k,v in sorted(svc_t.items(),key=lambda x:-x[1])]) + fig = px.bar(sdf,x="Waste ($)",y="Service",orientation="h", + color="Waste ($)",color_continuous_scale=["#fde8e8","#e05252"],text="Waste ($)") + fig.update_traces(texttemplate="$%{text:,.0f}",textposition="outside") + fig.update_layout(showlegend=False,coloraxis_showscale=False, + plot_bgcolor="white",paper_bgcolor="white", + margin=dict(l=0,r=60,t=10,b=0),height=260, + yaxis=dict(showgrid=False),xaxis=dict(showgrid=True,gridcolor="#f0f0f0")) + st.plotly_chart(fig, use_container_width=True) + + with chart_r: + st.markdown("#### Opportunity by category") + cat_t = {} + for f in findings: + cat_t[f["category"]] = cat_t.get(f["category"],0) + f["monthly_saving"] + if cat_t: + cdf = pd.DataFrame([{"Category":k,"Opp ($)":round(v,2)} for k,v in sorted(cat_t.items(),key=lambda x:-x[1]) if v>0]) + fig2 = px.pie(cdf,values="Opp ($)",names="Category", + color_discrete_sequence=["#e05252","#f59e0b","#3b82f6","#8b5cf6","#10b981"],hole=0.45) + fig2.update_traces(textposition="outside",textinfo="label+percent") + fig2.update_layout(showlegend=False,paper_bgcolor="white", + margin=dict(l=0,r=0,t=10,b=0),height=260) + st.plotly_chart(fig2, use_container_width=True) + + # Quick wins + if quick_wins: + st.markdown("#### ⚡ Quick wins") + for w in quick_wins[:3]: + st.markdown(f'
✅ {w}
', unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # Findings + st.markdown("#### 🔍 Flagged services") + filtered = [f for f in findings if f["category"] in selected_cats and f["severity"] in selected_sev] + if not filtered: + st.info("No findings match filters.") + else: + show_action = st.toggle("Show AWS remediation actions", value=False) + for f in filtered: + sev = f["severity"].lower() + action_html = f'
$ {f["aws_action"]}
' if show_action and f["aws_action"] else "" + saving = f"${f['monthly_saving']:,.2f}/mo opportunity" if f["monthly_saving"] > 0 else "Investigate" + st.markdown(f""" +
+
FINDING #{f['rank']}
+
{f['name']}
+
{f['plain_english']}
+
Impact: {f['business_impact']}
+
+ {f['severity']} + 🏷 {f['category']} + 💰 {saving} +
+
🔧 {f['priority_action']}
+ {action_html} +
""", unsafe_allow_html=True) + + # Service insights + sb = report.get("service_breakdown", {}) + if sb: + st.markdown("---") + st.markdown("#### 📊 Service insights") + si1, si2 = st.columns(2) + with si1: + if sb.get("biggest_concern"): st.error(f"🚨 **Biggest concern:** {sb['biggest_concern']}") + if sb.get("most_improved"): st.success(f"✅ **Most improved:** {sb['most_improved']}") + with si2: + if sb.get("watch_list"): st.warning(f"👀 **Watch list:** {', '.join(sb['watch_list'])}") + + st.markdown("---") + st.markdown("#### 📋 Leadership recommendation") + st.info(report.get("closing_recommendation", "")) + st.caption("Built for Perforce Global Jam 2026 · Team Ghost Busters · Cloud Cost Waste Hunter") + +# ── RIGHT PANEL: FinOps AI Chatbot ──────────────────────────────────────────── +with chat_col: + st.markdown(""" +
+
🤖 FinOps AI
+
+ Ask anything about your AWS costs +
+
+ """, unsafe_allow_html=True) + + # Suggested questions + suggestions = [ + "Which service should I fix first?", + "Why did EC2-Other spike?", + "How much can we save on Neptune?", + "What is the DevOpsAgent charge?", + "Give me a 3-step action plan", + ] + st.markdown("

💡 Suggested questions:

", + unsafe_allow_html=True) + for i, sug in enumerate(suggestions): + if st.button(sug, key=f"sug_{i}", use_container_width=True): + st.session_state.chat_history.append({"role":"user","content":sug}) + with st.spinner("Thinking..."): + ans = call_claude(st.session_state.chat_history) + st.session_state.chat_history.append({"role":"assistant","content":ans}) + + st.markdown("
", unsafe_allow_html=True) + + # Chat history + for msg in st.session_state.chat_history: + if msg["role"] == "user": + st.markdown( + f"
You: {msg['content']}
", + unsafe_allow_html=True) + else: + st.markdown( + f"
🤖 FinOps AI: {msg['content']}
", + unsafe_allow_html=True) + + st.markdown("
", unsafe_allow_html=True) + + # Input + if prompt_input := st.chat_input("Ask about your AWS costs..."): + st.session_state.chat_history.append({"role":"user","content":prompt_input}) + with st.spinner("Thinking..."): + ans = call_claude(st.session_state.chat_history) + st.session_state.chat_history.append({"role":"assistant","content":ans}) + st.rerun() + + if st.session_state.chat_history: + if st.button("🗑️ Clear chat", use_container_width=True): + st.session_state.chat_history = [] + st.rerun() \ No newline at end of file diff --git a/detection_engine.py b/detection_engine.py index 4883b55..83d9627 100644 --- a/detection_engine.py +++ b/detection_engine.py @@ -1,3 +1,5 @@ +import re +import os import pandas as pd import json from datetime import datetime, timedelta @@ -27,11 +29,281 @@ } # ─── Load data ──────────────────────────────────────────────────────────────── + +REQUIRED_COLUMNS = { + "resource_id", "resource_name", "service", "resource_type", + "region", "team", "environment", "cpu_avg_7d", "monthly_cost_usd", + "days_running", "last_accessed", "status", +} + + +def _detect_csv_format(filepath: str) -> str: + """Return 'inventory', 'billing_resource', or 'billing_service'.""" + with open(filepath, newline="", encoding="utf-8-sig") as f: + first_line = f.readline() + # Resource-level billing export: first cell is "Resource", columns are ARNs/IDs with ($) + if first_line.lstrip("\ufeff").strip('"').startswith("Resource") and "($)" in first_line: + return "billing_resource" + # Service-level billing export: first cell is "Service", columns are service names with ($) + if "Service" in first_line and "($)" in first_line: + return "billing_service" + return "inventory" + + def load_data(filepath="aws_cost_data.csv"): + if not os.path.exists(filepath): + raise FileNotFoundError( + f"\n File not found: '{filepath}'\n" + f" Check the path and try again." + ) + + fmt = _detect_csv_format(filepath) + + if fmt == "billing_resource": + # Cost Explorer resource-level export — run billing analysis instead + print(f" Detected format: AWS Cost Explorer resource-level export") + print(f" Running billing analysis mode...\n") + run_billing_analysis(filepath) + raise SystemExit(0) + + if fmt == "billing_service": + raise ValueError( + f"\n Detected an AWS Cost Explorer service-level billing export in '{filepath}'.\n" + f" This format has service costs grouped by date and lacks per-resource\n" + f" metrics (CPU, size, team, etc.) needed for waste detection.\n\n" + f" Use 'aws_cost_data.csv' (the included sample) as a template for the\n" + f" resource inventory format this engine requires." + ) + + # Inventory format — validate required columns + preview = pd.read_csv(filepath, nrows=0) + actual_cols = set(preview.columns.str.strip()) + missing = REQUIRED_COLUMNS - actual_cols + if missing: + raise ValueError( + f"\n Incompatible CSV format in '{filepath}'.\n" + f" Missing required columns: {sorted(missing)}\n" + f" Found columns: {sorted(actual_cols)}\n\n" + f" Required columns: {sorted(REQUIRED_COLUMNS)}" + ) + df = pd.read_csv(filepath, parse_dates=["last_accessed"]) df["days_since_access"] = (datetime.today() - df["last_accessed"]).dt.days return df + +# ─── Billing CSV analysis (Cost Explorer resource-level export) ─────────────── + +def _infer_service(resource_id: str) -> str: + h = resource_id + if re.match(r"^i-[0-9a-f]+$", h): return "EC2" + if re.match(r"^vol-[0-9a-f]+$", h): return "EBS" + if re.match(r"^snap-[0-9a-f]+$", h): return "EBS Snapshot" + if re.match(r"^eipalloc-", h): return "Elastic IP" + if re.match(r"^eni-", h): return "Network Interface" + if re.match(r"^vpn-", h): return "VPN" + if "cloudfront" in h: return "CloudFront" + if "elasticloadbalancing" in h: return "ELB" + if "rds" in h or "docdb" in h: return "RDS" + if "kinesis" in h: return "Kinesis" + if "logs" in h and "log-group" in h: return "CloudWatch Logs" + if "lambda" in h: return "Lambda" + if "kms" in h: return "KMS" + if "elastic-ip" in h: return "Elastic IP" + if "route53" in h: return "Route53" + if "quicksight" in h: return "QuickSight" + if "arn:aws:s3:::" in h: return "S3" + if "secretsmanager" in h: return "Secrets Manager" + if "ecr" in h: return "ECR" + if "elasticfilesystem" in h: return "EFS" + if "scheduler" in h: return "EventBridge" + if "sns" in h: return "SNS" + if "sqs" in h: return "SQS" + if "amplify" in h: return "Amplify" + if "cloudformation" in h: return "CloudFormation" + return "Other" + + +def _extract_region_from_arn(arn: str) -> str: + parts = arn.split(":") + if len(parts) >= 4 and parts[3]: + return parts[3] + return "global" + + +def run_billing_analysis(filepath: str) -> None: + """Parse a Cost Explorer resource-level billing CSV and write billing_report.json.""" + import csv as csv_mod + from collections import defaultdict + + with open(filepath, newline="", encoding="utf-8-sig") as f: + rows = list(csv_mod.reader(f)) + + if len(rows) < 2: + print(" Empty or unreadable file.") + return + + headers = rows[0] # label + resource ARNs/IDs + total_row = rows[1] # "Resource total" + costs + + # Collect date rows (any row whose first cell looks like a date) + date_rows = [r for r in rows[2:] if re.match(r"\d{4}-\d{2}-\d{2}", r[0].strip('"'))] + dates = [r[0].strip('"') for r in date_rows] + + # Build S3 bucket name set from ARN-prefixed columns for cross-reference + s3_bucket_names = { + headers[i].strip('"').replace("arn:aws:s3:::", "").replace("($)", "").strip() + for i in range(1, len(headers)) + if "arn:aws:s3:::" in headers[i] + } + + resources = [] + for i in range(1, len(headers)): + raw_name = headers[i].strip('"').replace("($)", "").strip() + cost_str = total_row[i] if i < len(total_row) else "0" + try: + total_cost = float(cost_str) + except ValueError: + total_cost = 0.0 + + # Skip the synthetic "Total costs" column + if raw_name in ("Total costs", "No Resource Id"): + continue + + # Determine service + # S3: bare bucket names (no arn: prefix) that appear in the s3_bucket_names set + if (raw_name in s3_bucket_names + and not raw_name.startswith("arn:") + and not re.match(r"^i-|^vol-|^vpn-|^snap-|^eipalloc-|^eni-", raw_name)): + service = "S3" + resource_id = raw_name + else: + service = _infer_service(raw_name) + resource_id = raw_name + + # Skip ARN-prefixed S3 duplicates (they show $0 and are already counted above) + if "arn:aws:s3:::" in raw_name: + continue + + # Per-date costs (for last-active heuristic) + daily_costs = [] + for dr in date_rows: + try: + daily_costs.append((dr[0].strip('"'), float(dr[i]) if i < len(dr) else 0.0)) + except (ValueError, IndexError): + daily_costs.append((dr[0].strip('"'), 0.0)) + + # Last date with non-zero cost + active_dates = [d for d, c in daily_costs if c > 0] + last_active = active_dates[-1] if active_dates else (dates[-1] if dates else "unknown") + first_active = active_dates[0] if active_dates else (dates[0] if dates else "unknown") + + region = _extract_region_from_arn(raw_name) if raw_name.startswith("arn:") else "us-east-1" + monthly_cost = round(total_cost * 30, 2) + + resources.append({ + "resource_id": resource_id, + "service": service, + "region": region, + "daily_cost": round(total_cost, 6), + "monthly_cost": monthly_cost, + "last_active": last_active, + "first_active": first_active, + "date_range": f"{dates[0]} → {dates[-1]}" if dates else "unknown", + }) + + resources.sort(key=lambda x: -x["monthly_cost"]) + + # Service summary + by_service: dict = defaultdict(lambda: {"count": 0, "monthly_cost": 0.0, "resources": []}) + for r in resources: + by_service[r["service"]]["count"] += 1 + by_service[r["service"]]["monthly_cost"] += r["monthly_cost"] + by_service[r["service"]]["resources"].append(r) + + # S3 deep dive + s3_resources = sorted( + [r for r in resources if r["service"] == "S3"], + key=lambda x: -x["monthly_cost"] + ) + s3_zero = [r for r in s3_resources if r["monthly_cost"] == 0] + s3_active = [r for r in s3_resources if r["monthly_cost"] > 0] + + total_monthly = round(sum(r["monthly_cost"] for r in resources), 2) + s3_monthly = round(sum(r["monthly_cost"] for r in s3_resources), 2) + + report = { + "generated_at": datetime.today().strftime("%Y-%m-%d %H:%M"), + "source_file": filepath, + "date_range": f"{dates[0]} → {dates[-1]}" if dates else "unknown", + "total_resources": len(resources), + "total_monthly_cost": total_monthly, + "s3_summary": { + "total_buckets": len(s3_resources), + "buckets_with_cost": len(s3_active), + "zero_cost_buckets": len(s3_zero), + "total_monthly_cost": s3_monthly, + "pct_of_total": round(s3_monthly / total_monthly * 100, 1) if total_monthly else 0, + }, + "service_breakdown": { + svc: {"count": v["count"], "monthly_cost": round(v["monthly_cost"], 2)} + for svc, v in sorted(by_service.items(), key=lambda x: -x[1]["monthly_cost"]) + }, + "top_s3_buckets": s3_active[:20], + "zero_cost_s3": [r["resource_id"] for r in s3_zero], + "top_resources": resources[:20], + } + + with open("billing_report.json", "w") as f: + json.dump(report, f, indent=2) + + # ── Print summary ────────────────────────────────────────────────────────── + print("=" * 60) + print(" BILLING ANALYSIS — COST EXPLORER RESOURCE EXPORT") + print("=" * 60) + print(f" Date range : {report['date_range']}") + print(f" Total resources : {report['total_resources']}") + print(f" Total monthly est : ${total_monthly:,.2f}") + print() + print(" Cost by service:") + for svc, info in report["service_breakdown"].items(): + bar = "█" * min(int(info["monthly_cost"] / total_monthly * 30), 30) if total_monthly else "" + print(f" {svc:<25} ${info['monthly_cost']:>10,.2f}/mo {bar}") + print() + print(f" S3 BUCKET ANALYSIS ({len(s3_resources)} buckets · ${s3_monthly:,.2f}/mo)") + print(f" {'Bucket':<50} {'Monthly':>10}") + print(f" {'-'*50} {'-'*10}") + for b in s3_active[:15]: + print(f" {b['resource_id']:<50} ${b['monthly_cost']:>9,.2f}") + if len(s3_active) > 15: + print(f" ... and {len(s3_active)-15} more buckets") + if s3_zero: + print(f"\n {len(s3_zero)} buckets with $0 cost (potentially unused):") + for b in s3_zero[:10]: + print(f" - {b['resource_id']}") + print() + print(" billing_report.json written.") + print("=" * 60) + +# ─── S3 helpers ───────────────────────────────────────────────────────────────── + +def _parse_size_gb(resource_type: str) -> float: + """Extract numeric GB from strings like 'Standard-1625GB'.""" + m = re.search(r'([\d.]+)GB', resource_type, re.IGNORECASE) + return float(m.group(1)) if m else 0.0 + + +def _s3_access_tier(days: int) -> str: + """Classify S3 bucket by days since last access.""" + if days < 30: + return "Active" + elif days < 60: + return "Infrequent" + elif days < 90: + return "Cold" + return "Frozen" + + # ─── Detection rules ────────────────────────────────────────────────────────── def detect_idle_ec2(df): @@ -115,20 +387,59 @@ def detect_cold_s3(df): ] for _, r in s3.iterrows(): saving = round(r["monthly_cost_usd"] * 0.55, 2) # Glacier ~55% cheaper + size_gb = _parse_size_gb(str(r["resource_type"])) + tier = _s3_access_tier(int(r["days_since_access"])) findings.append({ - "finding_id": f"S3-COLD-{r['resource_id'][-8:]}", - "category": "Storage Optimisation", - "severity": "MEDIUM", - "service": "S3", - "resource_id": r["resource_id"], - "resource_name": r["resource_name"], - "region": r["region"], - "team": r["team"], - "environment": r["environment"], - "detail": f"Bucket not accessed in {r['days_since_access']} days but on S3 Standard pricing. Move to Glacier.", + "finding_id": f"S3-COLD-{r['resource_id'][-8:]}", + "category": "Storage Optimisation", + "severity": "MEDIUM", + "service": "S3", + "resource_id": r["resource_id"], + "resource_name": r["resource_name"], + "region": r["region"], + "team": r["team"], + "environment": r["environment"], + "size_gb": size_gb, + "access_tier": tier, + "days_since_access": int(r["days_since_access"]), + "last_accessed": str(r["last_accessed"].date()), + "detail": f"{size_gb:,.0f} GB bucket not accessed in {r['days_since_access']} days but on S3 Standard pricing. Tier: {tier}.", + "monthly_waste_usd": saving, + "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", + "cli_fix": f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json" + }) + return findings + + +def detect_s3_infrequent_access(df): + """Flag S3 buckets accessed 30-59 days ago — candidates for S3-IA tier.""" + findings = [] + s3 = df[ + (df["service"] == "S3") & + (df["days_since_access"] >= 30) & + (df["days_since_access"] < S3_COLD_DAYS) + ] + for _, r in s3.iterrows(): + saving = round(r["monthly_cost_usd"] * 0.45, 2) # S3-IA ~45% cheaper + size_gb = _parse_size_gb(str(r["resource_type"])) + findings.append({ + "finding_id": f"S3-IA-{r['resource_id'][-8:]}", + "category": "Storage Optimisation", + "severity": "LOW", + "service": "S3", + "resource_id": r["resource_id"], + "resource_name": r["resource_name"], + "region": r["region"], + "team": r["team"], + "environment": r["environment"], + "size_gb": size_gb, + "access_tier": "Infrequent", + "days_since_access": int(r["days_since_access"]), + "last_accessed": str(r["last_accessed"].date()), + "detail": f"{size_gb:,.0f} GB bucket last accessed {r['days_since_access']} days ago. Move to S3-Infrequent Access tier.", "monthly_waste_usd": saving, - "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", - "cli_fix": f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json" + "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.", + "cli_fix": f"aws s3api put-bucket-intelligent-tiering-configuration --bucket {r['resource_id']} --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled" }) return findings @@ -226,6 +537,74 @@ def build_summary(all_findings, top10): } +# ─── S3 deep-dive analysis ──────────────────────────────────────────────────── + +def _build_s3_analysis(df): + """Build a rich S3 analysis dataset with size, tier, termination flags.""" + s3 = df[df["service"] == "S3"].copy() + rows = [] + for _, r in s3.iterrows(): + size_gb = _parse_size_gb(str(r["resource_type"])) + tier = _s3_access_tier(int(r["days_since_access"])) + terminate = (tier == "Frozen") and (r["environment"] in ("dev", "sandbox")) + if tier == "Frozen": + saving = round(r["monthly_cost_usd"] * 0.55, 2) + recommendation = "Delete bucket (dev/sandbox) or archive to Glacier" + elif tier == "Cold": + saving = round(r["monthly_cost_usd"] * 0.55, 2) + recommendation = "Move to S3 Glacier via Lifecycle rule" + elif tier == "Infrequent": + saving = round(r["monthly_cost_usd"] * 0.45, 2) + recommendation = "Switch to S3-Infrequent Access or Intelligent-Tiering" + else: + saving = 0.0 + recommendation = "No action needed — actively used" + rows.append({ + "resource_id": r["resource_id"], + "resource_name": r["resource_name"], + "region": r["region"], + "team": r["team"], + "environment": r["environment"], + "size_gb": size_gb, + "storage_class": str(r["resource_type"]).split("-")[0], + "last_accessed": str(r["last_accessed"].date()), + "days_since_access": int(r["days_since_access"]), + "access_tier": tier, + "monthly_cost_usd": float(r["monthly_cost_usd"]), + "potential_saving": saving, + "terminate_candidate": terminate, + "recommendation": recommendation, + "cli_fix": ( + f"aws s3 rb s3://{r['resource_id']} --force" + if terminate else + f"aws s3api put-bucket-lifecycle-configuration --bucket {r['resource_id']} --lifecycle-configuration file://glacier-lifecycle.json" + ), + }) + rows.sort(key=lambda x: x["days_since_access"], reverse=True) + total_size = round(sum(r["size_gb"] for r in rows), 1) + total_cost = round(sum(r["monthly_cost_usd"] for r in rows), 2) + total_saving = round(sum(r["potential_saving"] for r in rows), 2) + tier_summary = {} + for r in rows: + tier_summary.setdefault(r["access_tier"], {"count": 0, "size_gb": 0.0, "cost": 0.0}) + tier_summary[r["access_tier"]]["count"] += 1 + tier_summary[r["access_tier"]]["size_gb"] += r["size_gb"] + tier_summary[r["access_tier"]]["cost"] += r["monthly_cost_usd"] + for t in tier_summary.values(): + t["size_gb"] = round(t["size_gb"], 1) + t["cost"] = round(t["cost"], 2) + return { + "generated_at": datetime.today().strftime("%Y-%m-%d %H:%M"), + "total_buckets": len(rows), + "total_size_gb": total_size, + "total_monthly_cost": total_cost, + "potential_saving": total_saving, + "terminate_candidates": sum(1 for r in rows if r["terminate_candidate"]), + "tier_summary": tier_summary, + "buckets": rows, + } + + # ─── Main ───────────────────────────────────────────────────────────────────── def run_detection(filepath="aws_cost_data.csv"): @@ -239,11 +618,17 @@ def run_detection(filepath="aws_cost_data.csv"): detect_unattached_ebs(df) + detect_unassociated_eips(df) + detect_cold_s3(df) + + detect_s3_infrequent_access(df) + detect_rightsizing(df) ) print(f"Total findings: {len(all_findings)}") + # Build and persist dedicated S3 analysis dataset + s3_analysis = _build_s3_analysis(df) + with open("s3_analysis.json", "w") as f: + json.dump(s3_analysis, f, indent=2) + top10 = score_and_rank(all_findings) summary = build_summary(all_findings, top10) @@ -273,4 +658,15 @@ def run_detection(filepath="aws_cost_data.csv"): return output if __name__ == "__main__": - run_detection("aws_cost_data.csv") + import argparse + parser = argparse.ArgumentParser( + description="Cloud Cost Waste Hunter — detect AWS waste from a cost CSV file" + ) + parser.add_argument( + "filepath", + nargs="?", + default="aws_cost_data.csv", + help="Path to the AWS cost CSV file (default: aws_cost_data.csv)", + ) + args = parser.parse_args() + run_detection(args.filepath) diff --git a/findings.json b/findings.json index ee13b7a..e8c747c 100644 --- a/findings.json +++ b/findings.json @@ -1,13 +1,13 @@ { "summary": { - "generated_at": "2026-05-25 18:13", - "total_findings": 39, - "total_monthly_waste": 5647.17, - "total_annual_waste": 67766.04, + "generated_at": "2026-05-26 15:20", + "total_findings": 41, + "total_monthly_waste": 5726.62, + "total_annual_waste": 68719.44, "waste_by_category": { "Idle Resource": 4990.2, "Zombie Resource": 551.6, - "Storage Optimisation": 105.37 + "Storage Optimisation": 184.82 }, "top10_monthly_waste": 4608.3 }, @@ -199,7 +199,7 @@ "recommendation": "Stop or terminate i-09963334018. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-09963334018 --region us-east-1", "waste_score": 60.0, - "rank": 17 + "rank": 18 }, { "finding_id": "IDLE-EC2-989805", @@ -216,7 +216,7 @@ "recommendation": "Stop or terminate i-01438989805. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01438989805 --region us-east-1", "waste_score": 30.0, - "rank": 21 + "rank": 22 }, { "finding_id": "IDLE-EC2-792787", @@ -352,7 +352,7 @@ "recommendation": "Stop or terminate i-01822873088. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01822873088 --region eu-west-1", "waste_score": 60.0, - "rank": 18 + "rank": 19 }, { "finding_id": "IDLE-EC2-514789", @@ -386,7 +386,7 @@ "recommendation": "Stop or terminate i-04875962612. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-04875962612 --region ap-south-1", "waste_score": 30.0, - "rank": 22 + "rank": 23 }, { "finding_id": "IDLE-EC2-237817", @@ -420,7 +420,7 @@ "recommendation": "Stop or terminate i-01676168421. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01676168421 --region eu-west-1", "waste_score": 30.0, - "rank": 23 + "rank": 24 }, { "finding_id": "IDLE-RDS-LA8541", @@ -505,7 +505,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-08067372072 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08067372072", "waste_score": 20.0, - "rank": 24 + "rank": 25 }, { "finding_id": "EBS-UNATTACHED-416213", @@ -522,7 +522,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-01429416213 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-01429416213", "waste_score": 20.0, - "rank": 25 + "rank": 26 }, { "finding_id": "EBS-UNATTACHED-285822", @@ -539,7 +539,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-09303285822 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-09303285822", "waste_score": 10.0, - "rank": 29 + "rank": 31 }, { "finding_id": "EBS-UNATTACHED-782991", @@ -556,7 +556,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03271782991 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03271782991", "waste_score": 10.0, - "rank": 30 + "rank": 32 }, { "finding_id": "EBS-UNATTACHED-872495", @@ -607,7 +607,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-07393195616 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-07393195616", "waste_score": 20.0, - "rank": 26 + "rank": 27 }, { "finding_id": "EBS-UNATTACHED-195918", @@ -624,7 +624,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-05310195918 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-05310195918", "waste_score": 5.0, - "rank": 32 + "rank": 34 }, { "finding_id": "EBS-UNATTACHED-785916", @@ -641,7 +641,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-08325785916 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08325785916", "waste_score": 20.0, - "rank": 27 + "rank": 28 }, { "finding_id": "EBS-UNATTACHED-292475", @@ -658,7 +658,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03343292475 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03343292475", "waste_score": 5.0, - "rank": 33 + "rank": 35 }, { "finding_id": "EBS-UNATTACHED-374338", @@ -692,7 +692,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03929454134 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03929454134", "waste_score": 20.0, - "rank": 28 + "rank": 29 }, { "finding_id": "EIP-UNUSED-640499", @@ -709,7 +709,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-063640499 --region ap-south-1", "waste_score": 2.16, - "rank": 34 + "rank": 36 }, { "finding_id": "EIP-UNUSED-062156", @@ -726,7 +726,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-067062156 --region ap-south-1", "waste_score": 2.16, - "rank": 35 + "rank": 37 }, { "finding_id": "EIP-UNUSED-813739", @@ -743,7 +743,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-084813739 --region ap-south-1", "waste_score": 2.16, - "rank": 36 + "rank": 38 }, { "finding_id": "EIP-UNUSED-600766", @@ -760,7 +760,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-099600766 --region eu-west-1", "waste_score": 2.16, - "rank": 37 + "rank": 39 }, { "finding_id": "EIP-UNUSED-276174", @@ -777,7 +777,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-067276174 --region eu-west-1", "waste_score": 2.16, - "rank": 38 + "rank": 40 }, { "finding_id": "EIP-UNUSED-788100", @@ -794,7 +794,28 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-044788100 --region ap-south-1", "waste_score": 2.16, - "rank": 39 + "rank": 41 + }, + { + "finding_id": "S3-COLD-ucket-03", + "category": "Storage Optimisation", + "severity": "MEDIUM", + "service": "S3", + "resource_id": "s3-frontend-dev-bucket-03", + "resource_name": "frontend-dev-s3-03", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 4951.0, + "access_tier": "Cold", + "days_since_access": 60, + "last_accessed": "2026-03-27", + "detail": "4,951 GB bucket not accessed in 60 days but on S3 Standard pricing. Tier: Cold.", + "monthly_waste_usd": 62.63, + "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json", + "waste_score": 62.63, + "rank": 17 }, { "finding_id": "S3-COLD-ucket-04", @@ -806,12 +827,16 @@ "region": "us-east-1", "team": "payments", "environment": "sandbox", - "detail": "Bucket not accessed in 101 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 3822.0, + "access_tier": "Frozen", + "days_since_access": 102, + "last_accessed": "2026-02-13", + "detail": "3,822 GB bucket not accessed in 102 days but on S3 Standard pricing. Tier: Frozen.", "monthly_waste_usd": 48.35, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-04 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 48.35, - "rank": 20 + "rank": 21 }, { "finding_id": "S3-COLD-ucket-07", @@ -823,12 +848,16 @@ "region": "us-east-1", "team": "payments", "environment": "sandbox", - "detail": "Bucket not accessed in 78 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 578.0, + "access_tier": "Cold", + "days_since_access": 79, + "last_accessed": "2026-03-08", + "detail": "578 GB bucket not accessed in 79 days but on S3 Standard pricing. Tier: Cold.", "monthly_waste_usd": 7.31, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 7.31, - "rank": 31 + "rank": 33 }, { "finding_id": "S3-COLD-ucket-08", @@ -840,12 +869,37 @@ "region": "us-east-1", "team": "ml-ops", "environment": "staging", - "detail": "Bucket not accessed in 91 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 3930.0, + "access_tier": "Frozen", + "days_since_access": 92, + "last_accessed": "2026-02-23", + "detail": "3,930 GB bucket not accessed in 92 days but on S3 Standard pricing. Tier: Frozen.", "monthly_waste_usd": 49.71, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 49.71, - "rank": 19 + "rank": 20 + }, + { + "finding_id": "S3-IA-ucket-01", + "category": "Storage Optimisation", + "severity": "LOW", + "service": "S3", + "resource_id": "s3-frontend-dev-bucket-01", + "resource_name": "frontend-dev-s3-01", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 1625.0, + "access_tier": "Infrequent", + "days_since_access": 52, + "last_accessed": "2026-04-04", + "detail": "1,625 GB bucket last accessed 52 days ago. Move to S3-Infrequent Access tier.", + "monthly_waste_usd": 16.82, + "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.", + "cli_fix": "aws s3api put-bucket-intelligent-tiering-configuration --bucket s3-frontend-dev-bucket-01 --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled", + "waste_score": 10.09, + "rank": 30 } ] } \ No newline at end of file diff --git a/llm_report.json b/llm_report.json index 1c4bdd4..82c7afa 100644 --- a/llm_report.json +++ b/llm_report.json @@ -1,17 +1,17 @@ { - "executive_summary": "This audit identified $67,766 in annual waste across 39 findings, with a single idle RDS instance costing $691/month being the top offender. 88% of waste ($4,990) comes from idle resources running in non-production environments that could be safely stopped today. Immediate action on the top 10 findings alone would save $4,607 monthly with zero business impact.", - "total_monthly_waste": 5647.17, - "total_annual_waste": 67766.04, + "executive_summary": "Your AWS infrastructure is wasting $68,719 annually across 41 resources, with the majority ($59,884) coming from idle EC2 instances and RDS databases running with extremely low utilization. The worst offender is a sandbox RDS instance that's been idle for 68 days, burning $691 monthly with just 2.91% CPU usage. Immediate action on the top 10 findings alone would save $48,516 annually with zero business impact.", + "total_monthly_waste": 5726.62, + "total_annual_waste": 68719.44, "findings": [ { "rank": 1, "finding_id": "IDLE-RDS-LA8541", "resource_name": "platform-sandbox-rds-03", "team": "platform", - "plain_english": "This database has been sitting idle for 68 days in a sandbox environment, using only 2.91% CPU but still charging full price. RDS instances can't be stopped for more than 7 days, so it keeps auto-restarting and billing continuously.", - "business_impact": "Wasting $8,294 annually on a database that appears to serve no active purpose in sandbox testing.", + "plain_english": "This database has been sitting idle for 68 days in your sandbox environment, using only 2.91% of its CPU capacity. Unlike EC2 instances, RDS can't be stopped for more than 7 days, so it's been continuously charging you $691 per month for doing essentially nothing.", + "business_impact": "This single unused database is costing $8,294 annually with zero business value.", "monthly_saving": 691.2, - "priority_action": "Create a final backup snapshot and delete the database if no active development needs it.", + "priority_action": "Create a final backup snapshot and delete this database immediately if no active development is using it.", "cli_fix": "aws rds create-db-snapshot --db-instance-identifier db-PLA8541 --db-snapshot-identifier db-PLA8541-final-snap" }, { @@ -19,10 +19,10 @@ "finding_id": "IDLE-EC2-938612", "resource_name": "payments-staging-ec2-16", "team": "payments", - "plain_english": "This server has been running for 94 days but only using 4.18% of its computing power, well below our 5% efficiency threshold. It's essentially an expensive computer doing almost nothing.", - "business_impact": "Burning $6,635 annually on unused computing capacity that could fund actual business initiatives.", + "plain_english": "This server has been running for 94 days with only 4.18% CPU usage, well below productive levels. It's essentially a very expensive computer that's turned on but not doing any meaningful work.", + "business_impact": "Wasting $6,635 annually on unused staging capacity that could be provisioned on-demand when needed.", "monthly_saving": 552.9, - "priority_action": "Stop the instance immediately and implement auto-start/stop scheduling for staging workloads.", + "priority_action": "Stop this instance immediately and implement auto-scheduling to start it only when staging tests are needed.", "cli_fix": "aws ec2 stop-instances --instance-ids i-07517938612 --region ap-south-1" }, { @@ -30,10 +30,10 @@ "finding_id": "IDLE-EC2-514789", "resource_name": "payments-dev-ec2-21", "team": "payments", - "plain_english": "This development server has been running for 62 days with only 3.92% CPU usage, indicating no active development work. Development environments should be stopped when not actively coding.", - "business_impact": "Wasting $6,635 annually on idle development infrastructure that provides zero business value when unused.", + "plain_english": "This development server has been running for 62 days with only 3.92% CPU usage. Development environments typically don't need to run 24/7 since developers work normal business hours.", + "business_impact": "Burning $6,635 annually on always-on development infrastructure that likely sits unused nights and weekends.", "monthly_saving": 552.9, - "priority_action": "Stop the instance and train developers to start/stop resources as needed for active development.", + "priority_action": "Stop this instance and train developers to start it only when actively coding.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01420514789 --region eu-west-1" }, { @@ -41,10 +41,10 @@ "finding_id": "IDLE-EC2-237817", "resource_name": "data-eng-sandbox-ec2-23", "team": "data-eng", - "plain_english": "This sandbox server for data engineering has been idle for 39 days with 4.04% CPU usage. Sandbox environments should only run during active experimentation or testing.", - "business_impact": "Throwing away $6,635 annually on unused sandbox capacity that could support actual data engineering experiments.", + "plain_english": "This sandbox server for data engineering experiments has been idle for 39 days with 4.04% CPU usage. Sandbox environments should be ephemeral and destroyed when experiments are complete.", + "business_impact": "Costing $6,635 annually for experimental infrastructure that should be created and destroyed as needed.", "monthly_saving": 552.9, - "priority_action": "Immediately stop the instance and implement scheduled start/stop for sandbox workloads.", + "priority_action": "Terminate this instance and establish a policy that sandbox resources must be tagged with auto-deletion dates.", "cli_fix": "aws ec2 stop-instances --instance-ids i-03090237817 --region ap-south-1" }, { @@ -52,10 +52,10 @@ "finding_id": "IDLE-EC2-602734", "resource_name": "data-eng-prod-ec2-12", "team": "data-eng", - "plain_english": "This production server is severely underutilized at 2.18% CPU over 40 days, suggesting the workload doesn't match the server size. Production resources running this idle indicate poor capacity planning.", - "business_impact": "Wasting $5,875 annually on oversized production infrastructure that could be rightsized or consolidated.", + "plain_english": "This production data engineering server has been running for 40 days with only 2.18% CPU usage. Even in production, servers should be right-sized or replaced with auto-scaling solutions.", + "business_impact": "Wasting $5,875 annually on oversized production capacity that could be downsized or moved to serverless.", "monthly_saving": 489.6, - "priority_action": "Investigate if this workload can be moved to a smaller instance or consolidated with other services.", + "priority_action": "Investigate if this workload can be moved to AWS Lambda, Glue, or a smaller instance type.", "cli_fix": "aws ec2 stop-instances --instance-ids i-04272602734 --region eu-west-1" }, { @@ -63,10 +63,10 @@ "finding_id": "IDLE-EC2-899549", "resource_name": "frontend-staging-ec2-13", "team": "frontend", - "plain_english": "This staging server has been running for 22 days with extremely low CPU usage at 1.1%. Staging environments should only run during active testing or deployment activities.", - "business_impact": "Squandering $5,875 annually on idle staging infrastructure that provides no testing or deployment value when unused.", + "plain_english": "This frontend staging server has been running for 22 days with just 1.1% CPU usage. Staging environments for frontend apps can typically be replaced with serverless hosting or on-demand containers.", + "business_impact": "Spending $5,875 annually on staging infrastructure that could be replaced with much cheaper serverless alternatives.", "monthly_saving": 489.6, - "priority_action": "Stop the instance and implement CI/CD automation to start staging resources only during deployments.", + "priority_action": "Migrate this staging environment to AWS App Runner, Amplify, or implement start/stop automation.", "cli_fix": "aws ec2 stop-instances --instance-ids i-04944899549 --region eu-west-1" }, { @@ -74,10 +74,10 @@ "finding_id": "IDLE-EC2-792787", "resource_name": "frontend-dev-ec2-04", "team": "frontend", - "plain_english": "This development server has been running for 106 days with only 2.2% CPU usage, indicating no active frontend development work. Long-running idle dev resources suggest poor resource hygiene practices.", - "business_impact": "Losing $4,356 annually on abandoned development infrastructure that could fund actual feature development.", + "plain_english": "This frontend development server has been running for 106 days with only 2.2% CPU usage. Development servers should be stopped when not actively being used for coding or testing.", + "business_impact": "Wasting $4,356 annually on development infrastructure that likely sits unused most of the time.", "monthly_saving": 363.0, - "priority_action": "Stop the instance immediately and establish team policies for managing development environment lifecycles.", + "priority_action": "Stop this instance and create a simple start/stop script for developers to use when needed.", "cli_fix": "aws ec2 stop-instances --instance-ids i-02625792787 --region eu-west-1" }, { @@ -85,10 +85,10 @@ "finding_id": "IDLE-EC2-494220", "resource_name": "frontend-staging-ec2-05", "team": "frontend", - "plain_english": "This staging server has been idle for 93 days with 2.95% CPU usage, far longer than any reasonable testing cycle. Staging resources should have automatic cleanup after testing phases.", - "business_impact": "Hemorrhaging $4,356 annually on forgotten staging infrastructure that serves no current testing or validation purpose.", + "plain_english": "This frontend staging server has been idle for 93 days with 2.95% CPU usage. Staging environments should only run when testing is actively happening, not 24/7.", + "business_impact": "Burning $4,356 annually on staging capacity that could be provisioned on-demand for testing cycles.", "monthly_saving": 363.0, - "priority_action": "Stop the instance and implement automated staging environment cleanup after 7 days of inactivity.", + "priority_action": "Stop this instance and integrate start/stop automation with your CI/CD pipeline to provision staging on-demand.", "cli_fix": "aws ec2 stop-instances --instance-ids i-05231494220 --region ap-south-1" }, { @@ -96,10 +96,10 @@ "finding_id": "IDLE-EC2-251661", "resource_name": "payments-sandbox-ec2-07", "team": "payments", - "plain_english": "This sandbox server has been running for 51 days with 3.92% CPU usage, indicating no active payment system experimentation. Sandbox resources should only run during active development or testing.", - "business_impact": "Wasting $3,319 annually on idle sandbox capacity that could support actual payments feature development.", + "plain_english": "This payments sandbox server has been running for 51 days with 3.92% CPU usage. Sandbox environments should be temporary and cleaned up regularly to prevent cost accumulation.", + "business_impact": "Costing $3,319 annually for experimental infrastructure that provides no ongoing business value.", "monthly_saving": 276.6, - "priority_action": "Stop the instance and create documentation for payments team on proper sandbox resource management.", + "priority_action": "Terminate this sandbox instance and implement automatic cleanup policies for all sandbox resources.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01240251661 --region ap-south-1" }, { @@ -107,38 +107,38 @@ "finding_id": "IDLE-EC2-598054", "resource_name": "platform-prod-ec2-19", "team": "platform", - "plain_english": "This production server has been running for 87 days with only 2.07% CPU usage, suggesting the workload has been moved elsewhere or dramatically reduced. Production resources this idle represent significant optimization opportunities.", - "business_impact": "Burning $3,319 annually on unused production capacity that could be eliminated or repurposed for actual platform needs.", + "plain_english": "This production platform server has been running for 87 days with only 2.07% CPU usage. Production resources should be right-sized and this appears significantly oversized for its actual workload.", + "business_impact": "Wasting $3,319 annually on production capacity that could be downsized or consolidated with other services.", "monthly_saving": 276.6, - "priority_action": "Verify if this server is still needed for production workloads and terminate or rightsize accordingly.", + "priority_action": "Analyze this server's actual workload and either downsize to a smaller instance type or consolidate with other services.", "cli_fix": "aws ec2 stop-instances --instance-ids i-09822598054 --region eu-west-1" } ], "quick_wins": [ - "Stop all development and sandbox EC2 instances immediately - they can be restarted when needed with zero data loss", - "Create final snapshots of the idle RDS instance and delete it if no active development depends on it", - "Implement AWS Instance Scheduler to automatically stop non-production resources outside business hours" + "Stop all sandbox and development EC2 instances immediately - they can be restarted when needed with zero data loss", + "Create final snapshots and delete the idle RDS instance in sandbox that's been unused for 68 days", + "Implement AWS Instance Scheduler on all non-production environments to automatically stop instances outside business hours" ], "team_breakdown": { "payments": { "monthly_waste": 1382.4, - "top_issue": "Multiple idle staging and development instances running continuously when they should be stopped between active development cycles" + "top_issue": "Multiple idle staging and development instances that should be stopped when not in use" + }, + "platform": { + "monthly_waste": 967.8, + "top_issue": "Idle RDS database in sandbox burning $691 monthly for 68 days with minimal usage" }, "data-eng": { "monthly_waste": 1042.5, - "top_issue": "Idle production and sandbox instances indicating poor capacity planning and resource lifecycle management" + "top_issue": "Idle servers in both production and sandbox that need right-sizing or termination" }, "frontend": { - "monthly_waste": 1215.6, - "top_issue": "Long-running idle development and staging environments suggesting lack of automated resource cleanup policies" - }, - "platform": { - "monthly_waste": 967.8, - "top_issue": "Expensive idle RDS instance in sandbox environment that should be deleted or converted to serverless" + "monthly_waste": 852.6, + "top_issue": "Multiple staging and development environments running 24/7 instead of on-demand" } }, - "closing_recommendation": "Focus immediately on stopping all non-production idle resources, which represents 85% of the waste and carries zero business risk. Implement automated scheduling and cleanup policies to prevent this waste from recurring, as the current burn rate of $67K annually could fund significant new infrastructure investments.", - "generated_at": "2026-05-25 18:17", + "closing_recommendation": "Implement a company-wide policy requiring all non-production resources to have auto-stop schedules and automatic cleanup dates. The top 10 findings alone represent $48,516 in annual waste that can be eliminated this week with zero impact on development velocity.", + "generated_at": "2026-05-26 16:56", "all_findings": [ { "finding_id": "IDLE-EC2-334018", @@ -155,7 +155,7 @@ "recommendation": "Stop or terminate i-09963334018. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-09963334018 --region us-east-1", "waste_score": 60.0, - "rank": 17 + "rank": 18 }, { "finding_id": "IDLE-EC2-989805", @@ -172,7 +172,7 @@ "recommendation": "Stop or terminate i-01438989805. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01438989805 --region us-east-1", "waste_score": 30.0, - "rank": 21 + "rank": 22 }, { "finding_id": "IDLE-EC2-792787", @@ -308,7 +308,7 @@ "recommendation": "Stop or terminate i-01822873088. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01822873088 --region eu-west-1", "waste_score": 60.0, - "rank": 18 + "rank": 19 }, { "finding_id": "IDLE-EC2-514789", @@ -342,7 +342,7 @@ "recommendation": "Stop or terminate i-04875962612. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-04875962612 --region ap-south-1", "waste_score": 30.0, - "rank": 22 + "rank": 23 }, { "finding_id": "IDLE-EC2-237817", @@ -376,7 +376,7 @@ "recommendation": "Stop or terminate i-01676168421. If needed occasionally, convert to spot or use auto-start/stop scheduler.", "cli_fix": "aws ec2 stop-instances --instance-ids i-01676168421 --region eu-west-1", "waste_score": 30.0, - "rank": 23 + "rank": 24 }, { "finding_id": "IDLE-RDS-LA8541", @@ -461,7 +461,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-08067372072 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08067372072", "waste_score": 20.0, - "rank": 24 + "rank": 25 }, { "finding_id": "EBS-UNATTACHED-416213", @@ -478,7 +478,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-01429416213 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-01429416213", "waste_score": 20.0, - "rank": 25 + "rank": 26 }, { "finding_id": "EBS-UNATTACHED-285822", @@ -495,7 +495,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-09303285822 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-09303285822", "waste_score": 10.0, - "rank": 29 + "rank": 31 }, { "finding_id": "EBS-UNATTACHED-782991", @@ -512,7 +512,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03271782991 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03271782991", "waste_score": 10.0, - "rank": 30 + "rank": 32 }, { "finding_id": "EBS-UNATTACHED-872495", @@ -563,7 +563,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-07393195616 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-07393195616", "waste_score": 20.0, - "rank": 26 + "rank": 27 }, { "finding_id": "EBS-UNATTACHED-195918", @@ -580,7 +580,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-05310195918 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-05310195918", "waste_score": 5.0, - "rank": 32 + "rank": 34 }, { "finding_id": "EBS-UNATTACHED-785916", @@ -597,7 +597,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-08325785916 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-08325785916", "waste_score": 20.0, - "rank": 27 + "rank": 28 }, { "finding_id": "EBS-UNATTACHED-292475", @@ -614,7 +614,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03343292475 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03343292475", "waste_score": 5.0, - "rank": 33 + "rank": 35 }, { "finding_id": "EBS-UNATTACHED-374338", @@ -648,7 +648,7 @@ "recommendation": "Snapshot then delete if data not needed. If needed, attach to an instance.", "cli_fix": "aws ec2 create-snapshot --volume-id vol-03929454134 --description 'backup-before-delete' && aws ec2 delete-volume --volume-id vol-03929454134", "waste_score": 20.0, - "rank": 28 + "rank": 29 }, { "finding_id": "EIP-UNUSED-640499", @@ -665,7 +665,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-063640499 --region ap-south-1", "waste_score": 2.16, - "rank": 34 + "rank": 36 }, { "finding_id": "EIP-UNUSED-062156", @@ -682,7 +682,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-067062156 --region ap-south-1", "waste_score": 2.16, - "rank": 35 + "rank": 37 }, { "finding_id": "EIP-UNUSED-813739", @@ -699,7 +699,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-084813739 --region ap-south-1", "waste_score": 2.16, - "rank": 36 + "rank": 38 }, { "finding_id": "EIP-UNUSED-600766", @@ -716,7 +716,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-099600766 --region eu-west-1", "waste_score": 2.16, - "rank": 37 + "rank": 39 }, { "finding_id": "EIP-UNUSED-276174", @@ -733,7 +733,7 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-067276174 --region eu-west-1", "waste_score": 2.16, - "rank": 38 + "rank": 40 }, { "finding_id": "EIP-UNUSED-788100", @@ -750,7 +750,28 @@ "recommendation": "Release EIP if no longer needed.", "cli_fix": "aws ec2 release-address --allocation-id eipalloc-044788100 --region ap-south-1", "waste_score": 2.16, - "rank": 39 + "rank": 41 + }, + { + "finding_id": "S3-COLD-ucket-03", + "category": "Storage Optimisation", + "severity": "MEDIUM", + "service": "S3", + "resource_id": "s3-frontend-dev-bucket-03", + "resource_name": "frontend-dev-s3-03", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 4951.0, + "access_tier": "Cold", + "days_since_access": 60, + "last_accessed": "2026-03-27", + "detail": "4,951 GB bucket not accessed in 60 days but on S3 Standard pricing. Tier: Cold.", + "monthly_waste_usd": 62.63, + "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json", + "waste_score": 62.63, + "rank": 17 }, { "finding_id": "S3-COLD-ucket-04", @@ -762,12 +783,16 @@ "region": "us-east-1", "team": "payments", "environment": "sandbox", - "detail": "Bucket not accessed in 101 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 3822.0, + "access_tier": "Frozen", + "days_since_access": 102, + "last_accessed": "2026-02-13", + "detail": "3,822 GB bucket not accessed in 102 days but on S3 Standard pricing. Tier: Frozen.", "monthly_waste_usd": 48.35, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-04 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 48.35, - "rank": 20 + "rank": 21 }, { "finding_id": "S3-COLD-ucket-07", @@ -779,12 +804,16 @@ "region": "us-east-1", "team": "payments", "environment": "sandbox", - "detail": "Bucket not accessed in 78 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 578.0, + "access_tier": "Cold", + "days_since_access": 79, + "last_accessed": "2026-03-08", + "detail": "578 GB bucket not accessed in 79 days but on S3 Standard pricing. Tier: Cold.", "monthly_waste_usd": 7.31, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 7.31, - "rank": 31 + "rank": 33 }, { "finding_id": "S3-COLD-ucket-08", @@ -796,12 +825,37 @@ "region": "us-east-1", "team": "ml-ops", "environment": "staging", - "detail": "Bucket not accessed in 91 days but on S3 Standard pricing. Move to Glacier.", + "size_gb": 3930.0, + "access_tier": "Frozen", + "days_since_access": 92, + "last_accessed": "2026-02-23", + "detail": "3,930 GB bucket not accessed in 92 days but on S3 Standard pricing. Tier: Frozen.", "monthly_waste_usd": 49.71, "recommendation": "Apply S3 Intelligent-Tiering or Lifecycle rule to move to Glacier after 30 days.", "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json", "waste_score": 49.71, - "rank": 19 + "rank": 20 + }, + { + "finding_id": "S3-IA-ucket-01", + "category": "Storage Optimisation", + "severity": "LOW", + "service": "S3", + "resource_id": "s3-frontend-dev-bucket-01", + "resource_name": "frontend-dev-s3-01", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 1625.0, + "access_tier": "Infrequent", + "days_since_access": 52, + "last_accessed": "2026-04-04", + "detail": "1,625 GB bucket last accessed 52 days ago. Move to S3-Infrequent Access tier.", + "monthly_waste_usd": 16.82, + "recommendation": "Switch to S3 Infrequent Access or enable Intelligent-Tiering.", + "cli_fix": "aws s3api put-bucket-intelligent-tiering-configuration --bucket s3-frontend-dev-bucket-01 --id tiering-config --intelligent-tiering-configuration Id=tiering-config,Status=Enabled", + "waste_score": 10.09, + "rank": 30 } ], "raw_top10": [ diff --git a/s3_analysis.json b/s3_analysis.json new file mode 100644 index 0000000..4412caa --- /dev/null +++ b/s3_analysis.json @@ -0,0 +1,168 @@ +{ + "generated_at": "2026-05-26 15:20", + "total_buckets": 8, + "total_size_gb": 18745.0, + "total_monthly_cost": 431.14, + "potential_saving": 184.82, + "terminate_candidates": 1, + "tier_summary": { + "Frozen": { + "count": 2, + "size_gb": 7752.0, + "cost": 178.3 + }, + "Cold": { + "count": 2, + "size_gb": 5529.0, + "cost": 127.16 + }, + "Infrequent": { + "count": 1, + "size_gb": 1625.0, + "cost": 37.38 + }, + "Active": { + "count": 3, + "size_gb": 3839.0, + "cost": 88.3 + } + }, + "buckets": [ + { + "resource_id": "s3-payments-sandbox-bucket-04", + "resource_name": "payments-sandbox-s3-04", + "region": "us-east-1", + "team": "payments", + "environment": "sandbox", + "size_gb": 3822.0, + "storage_class": "Standard", + "last_accessed": "2026-02-13", + "days_since_access": 102, + "access_tier": "Frozen", + "monthly_cost_usd": 87.91, + "potential_saving": 48.35, + "terminate_candidate": true, + "recommendation": "Delete bucket (dev/sandbox) or archive to Glacier", + "cli_fix": "aws s3 rb s3://s3-payments-sandbox-bucket-04 --force" + }, + { + "resource_id": "s3-ml-ops-staging-bucket-08", + "resource_name": "ml-ops-staging-s3-08", + "region": "us-east-1", + "team": "ml-ops", + "environment": "staging", + "size_gb": 3930.0, + "storage_class": "Standard", + "last_accessed": "2026-02-23", + "days_since_access": 92, + "access_tier": "Frozen", + "monthly_cost_usd": 90.39, + "potential_saving": 49.71, + "terminate_candidate": false, + "recommendation": "Delete bucket (dev/sandbox) or archive to Glacier", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-ml-ops-staging-bucket-08 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-payments-sandbox-bucket-07", + "resource_name": "payments-sandbox-s3-07", + "region": "us-east-1", + "team": "payments", + "environment": "sandbox", + "size_gb": 578.0, + "storage_class": "Standard", + "last_accessed": "2026-03-08", + "days_since_access": 79, + "access_tier": "Cold", + "monthly_cost_usd": 13.29, + "potential_saving": 7.31, + "terminate_candidate": false, + "recommendation": "Move to S3 Glacier via Lifecycle rule", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-sandbox-bucket-07 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-frontend-dev-bucket-03", + "resource_name": "frontend-dev-s3-03", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 4951.0, + "storage_class": "Standard", + "last_accessed": "2026-03-27", + "days_since_access": 60, + "access_tier": "Cold", + "monthly_cost_usd": 113.87, + "potential_saving": 62.63, + "terminate_candidate": false, + "recommendation": "Move to S3 Glacier via Lifecycle rule", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-03 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-frontend-dev-bucket-01", + "resource_name": "frontend-dev-s3-01", + "region": "us-east-1", + "team": "frontend", + "environment": "dev", + "size_gb": 1625.0, + "storage_class": "Standard", + "last_accessed": "2026-04-04", + "days_since_access": 52, + "access_tier": "Infrequent", + "monthly_cost_usd": 37.38, + "potential_saving": 16.82, + "terminate_candidate": false, + "recommendation": "Switch to S3-Infrequent Access or Intelligent-Tiering", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-frontend-dev-bucket-01 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-data-eng-dev-bucket-02", + "resource_name": "data-eng-dev-s3-02", + "region": "us-east-1", + "team": "data-eng", + "environment": "dev", + "size_gb": 203.0, + "storage_class": "Standard", + "last_accessed": "2026-04-29", + "days_since_access": 27, + "access_tier": "Active", + "monthly_cost_usd": 4.67, + "potential_saving": 0.0, + "terminate_candidate": false, + "recommendation": "No action needed \u2014 actively used", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-data-eng-dev-bucket-02 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-payments-staging-bucket-06", + "resource_name": "payments-staging-s3-06", + "region": "us-east-1", + "team": "payments", + "environment": "staging", + "size_gb": 2742.0, + "storage_class": "Standard", + "last_accessed": "2026-05-07", + "days_since_access": 19, + "access_tier": "Active", + "monthly_cost_usd": 63.07, + "potential_saving": 0.0, + "terminate_candidate": false, + "recommendation": "No action needed \u2014 actively used", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-payments-staging-bucket-06 --lifecycle-configuration file://glacier-lifecycle.json" + }, + { + "resource_id": "s3-data-eng-dev-bucket-05", + "resource_name": "data-eng-dev-s3-05", + "region": "us-east-1", + "team": "data-eng", + "environment": "dev", + "size_gb": 894.0, + "storage_class": "Standard", + "last_accessed": "2026-05-14", + "days_since_access": 12, + "access_tier": "Active", + "monthly_cost_usd": 20.56, + "potential_saving": 0.0, + "terminate_candidate": false, + "recommendation": "No action needed \u2014 actively used", + "cli_fix": "aws s3api put-bucket-lifecycle-configuration --bucket s3-data-eng-dev-bucket-05 --lifecycle-configuration file://glacier-lifecycle.json" + } + ] +} \ No newline at end of file