diff --git a/dashboard_AI.py b/dashboard_AI.py
index 3fdf7cc..689a4d1 100644
--- a/dashboard_AI.py
+++ b/dashboard_AI.py
@@ -176,373 +176,673 @@ def call_claude(messages):
if report.get("source"):
st.markdown(f'๐ {report["source"]}', unsafe_allow_html=True)
-# โโ MAIN LAYOUT: left 62% content | right 38% chatbot โโโโโโโโโโโโโโโโโโโโโโโโโ
-main_col, chat_col = st.columns([0.62, 0.38])
-
-with main_col:
- # Metric cards
- c1, c2, c3, c4 = st.columns(4)
- with c1:
- st.markdown(f"""
-
๐ค FinOps AI
-
- Ask anything about your AWS costs
+ # Donut: tagged vs untagged by cost
+ ut_chart_l, ut_chart_r = st.columns(2)
+ with ut_chart_l:
+ st.markdown("**Tagging coverage by spend**")
+ tag_pie = pd.DataFrame([
+ {"Status": "Untagged", "Cost": round(untagged_cost, 2)},
+ {"Status": "Tagged", "Cost": round(total_cost - untagged_cost, 2)},
+ ])
+ fig_tag = px.pie(tag_pie, values="Cost", names="Status",
+ color_discrete_map={"Untagged": "#f59e0b", "Tagged": "#10b981"}, hole=0.5)
+ fig_tag.update_traces(textposition="outside", textinfo="label+percent")
+ fig_tag.update_layout(showlegend=False, paper_bgcolor="white",
+ margin=dict(l=0,r=0,t=10,b=0), height=220)
+ st.plotly_chart(fig_tag, use_container_width=True)
+
+ with ut_chart_r:
+ st.markdown("**Untagged spend by service**")
+ if "service" in untagged_df.columns:
+ svc_untagged = (
+ untagged_df.groupby("service")["monthly_cost_usd"]
+ .sum().reset_index()
+ .sort_values("monthly_cost_usd", ascending=True)
+ .tail(8)
+ )
+ svc_untagged.columns = ["Service", "Cost"]
+ fig_svc = px.bar(svc_untagged, x="Cost", y="Service", orientation="h",
+ color="Cost", color_continuous_scale=["#fef3c7", "#f59e0b"], text="Cost")
+ fig_svc.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+ fig_svc.update_layout(showlegend=False, coloraxis_showscale=False,
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=60,t=10,b=0), height=220,
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0"))
+ st.plotly_chart(fig_svc, use_container_width=True)
+
+ # Table of untagged resources
+ st.markdown("
", unsafe_allow_html=True)
+ st.markdown("**Resources missing tags** โ sorted by monthly cost")
+
+ show_cols = [c for c in ["resource_id","resource_name","service","region","team","environment","monthly_cost_usd","tags"] if c in untagged_df.columns]
+ display_df = (
+ untagged_df[show_cols]
+ .sort_values("monthly_cost_usd", ascending=False)
+ .reset_index(drop=True)
+ )
+ display_df.index += 1
+
+ # Search filter
+ tag_search = st.text_input("๐ Filter by resource ID or service", placeholder="e.g. vol- or EC2", key="tag_search")
+ if tag_search:
+ mask = display_df.apply(lambda row: tag_search.lower() in str(row).lower(), axis=1)
+ display_df = display_df[mask]
+
+ st.dataframe(
+ display_df,
+ use_container_width=True,
+ height=min(400, 40 + len(display_df) * 35),
+ column_config={
+ "monthly_cost_usd": st.column_config.NumberColumn("Monthly Cost ($)", format="$%.2f"),
+ "resource_id": st.column_config.TextColumn("Resource ID"),
+ "resource_name": st.column_config.TextColumn("Name"),
+ "service": st.column_config.TextColumn("Service"),
+ "region": st.column_config.TextColumn("Region"),
+ "team": st.column_config.TextColumn("Team"),
+ "environment": st.column_config.TextColumn("Environment"),
+ "tags": st.column_config.TextColumn("Tags"),
+ }
+ )
+
+ # Tagging CLI helper
+ st.markdown("
", unsafe_allow_html=True)
+ st.markdown("**Fix it โ bulk tag via AWS CLI:**")
+ top_untagged = untagged_df.sort_values("monthly_cost_usd", ascending=False).head(3)
+ for _, row in top_untagged.iterrows():
+ rid = row.get("resource_id", "")
+ region = row.get("region", "us-east-1")
+ svc = str(row.get("service", "")).lower()
+ if "ec2" in svc or rid.startswith(("i-", "vol-", "snap-")):
+ cli = f"aws ec2 create-tags --resources {rid} --tags Key=team,Value=your-team Key=environment,Value=prod Key=owner,Value=your-name --region {region}"
+ elif "rds" in svc:
+ cli = f"aws rds add-tags-to-resource --resource-name {rid} --tags Key=team,Value=your-team Key=environment,Value=prod --region {region}"
+ elif "s3" in svc:
+ cli = f"aws s3api put-bucket-tagging --bucket {rid} --tagging 'TagSet=[{{Key=team,Value=your-team}},{{Key=environment,Value=prod}}]'"
+ else:
+ cli = f"aws resourcegroupstaggingapi tag-resources --resource-arn-list {rid} --tags team=your-team,environment=prod,owner=your-name --region {region}"
+ st.markdown(
+ f'
$ {cli}
',
+ unsafe_allow_html=True
+ )
+
+ st.markdown("---")
+ st.caption("Built for Perforce Global Jam 2026 ยท Team Ghost Busters ยท Cloud Cost Waste Hunter")
+
+ # โโ RIGHT PANEL: FinOps AI Chatbot โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ with chat_col:
+ st.markdown("""
+
+
๐ค FinOps AI
+
+ Ask anything about your AWS costs
+
-
- """, unsafe_allow_html=True)
-
- # Suggested questions
- suggestions = [
- "Which service should I fix first?",
- "Why did EC2-Other spike?",
- "How much can we save on Neptune?",
- "What is the DevOpsAgent charge?",
- "Give me a 3-step action plan",
- ]
- st.markdown("
๐ก Suggested questions:
",
- unsafe_allow_html=True)
- for i, sug in enumerate(suggestions):
- if st.button(sug, key=f"sug_{i}", use_container_width=True):
- st.session_state.chat_history.append({"role":"user","content":sug})
+ """, unsafe_allow_html=True)
+
+ # Suggested questions
+ suggestions = [
+ "Which service should I fix first?",
+ "Why did EC2-Other spike?",
+ "How much can we save on Neptune?",
+ "What is the DevOpsAgent charge?",
+ "Give me a 3-step action plan",
+ ]
+ st.markdown("
๐ก Suggested questions:
",
+ unsafe_allow_html=True)
+ for i, sug in enumerate(suggestions):
+ if st.button(sug, key=f"sug_{i}", use_container_width=True):
+ st.session_state.chat_history.append({"role":"user","content":sug})
+ with st.spinner("Thinking..."):
+ ans = call_claude(st.session_state.chat_history)
+ st.session_state.chat_history.append({"role":"assistant","content":ans})
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Chat history
+ for msg in st.session_state.chat_history:
+ if msg["role"] == "user":
+ st.markdown(
+ f"
You: {msg['content']}
",
+ unsafe_allow_html=True)
+ else:
+ st.markdown(
+ f"
๐ค FinOps AI: {msg['content']}
",
+ unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Input
+ if prompt_input := st.chat_input("Ask about your AWS costs..."):
+ st.session_state.chat_history.append({"role":"user","content":prompt_input})
with st.spinner("Thinking..."):
ans = call_claude(st.session_state.chat_history)
st.session_state.chat_history.append({"role":"assistant","content":ans})
+ st.rerun()
- st.markdown("
", unsafe_allow_html=True)
+ if st.session_state.chat_history:
+ if st.button("๐๏ธ Clear chat", use_container_width=True):
+ st.session_state.chat_history = []
+ st.rerun()
- # Chat history
- for msg in st.session_state.chat_history:
- if msg["role"] == "user":
- st.markdown(
- f"
You: {msg['content']}
",
- unsafe_allow_html=True)
+with gcp_tab:
+ # โโ GCP LAYOUT โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ gcp_main, gcp_chat = st.columns([0.62, 0.38])
+
+ with gcp_main:
+ _gcp_report_path = os.environ.get("GHOSTBUSTERS_GCP_REPORT", "gcp_report.json")
+
+ @st.cache_data
+ def load_gcp_report(path=_gcp_report_path):
+ if not os.path.exists(path):
+ return None
+ with open(path) as _f:
+ return json.load(_f)
+
+ gr = load_gcp_report()
+
+ if gr is None:
+ st.info("No GCP report found. Run the pipeline:")
+ st.code("python3 gcp_detection_engine.py\nexport ANTHROPIC_API_KEY=$(cat SM_api_key) && python3 gcp_analyzer.py", language="bash")
else:
+ g_spend = gr.get("total_monthly_spend", 0)
+ g_opp = gr.get("total_monthly_opportunity", 0)
+ g_annual = gr.get("total_annual_opportunity", g_opp * 12)
+ g_saved = gr.get("savings_already_applied", gr.get("savings_efficiency_pct", 0))
+ g_findings= gr.get("findings", [])
+ g_list = gr.get("total_list_cost", 0)
+ g_eff = gr.get("savings_efficiency_pct", 0)
+
+ # Source badge
+ if gr.get("source"):
+ st.markdown(f'''
๐ {gr["source"]}''', unsafe_allow_html=True)
+
+ # Metric cards
+ gc1, gc2, gc3, gc4 = st.columns(4)
+ with gc1:
+ st.markdown(f'''
+
GCP Monthly Spend
+
${g_spend:,.0f}
+
after SADA discounts
+
''', unsafe_allow_html=True)
+ with gc2:
+ st.markdown(f'''
+
SADA Savings Applied
+
{g_eff:.0f}%
+
${g_list - g_spend:,.0f} saved/mo
+
''', unsafe_allow_html=True)
+ with gc3:
+ st.markdown(f'''
+
Additional Opportunity
+
${g_opp:,.0f}
+
additional/mo
+
''', unsafe_allow_html=True)
+ with gc4:
+ st.markdown(f'''
+
Annual Opportunity
+
${g_annual:,.0f}
+
if unaddressed
+
''', unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # AI Summary
st.markdown(
- f"
๐ค FinOps AI: {msg['content']}
",
+ f'''
๐ค AI Summary
{gr.get("executive_summary","")}
''',
unsafe_allow_html=True)
- st.markdown("
", unsafe_allow_html=True)
+ # Charts row
+ gc_l, gc_r = st.columns(2)
+ with gc_l:
+ st.markdown("#### GCP spend by service")
+ svcs = gr.get("raw_services", [])
+ if svcs:
+ svc_df = pd.DataFrame([
+ {"Service": s["service"][:28], "Monthly ($)": round(s["subtotal"], 2)}
+ for s in sorted(svcs, key=lambda x: -x["subtotal"])[:10]
+ ])
+ fig_g = px.bar(svc_df, x="Monthly ($)", y="Service", orientation="h",
+ color="Monthly ($)", color_continuous_scale=["#e0f2fe","#0ea5e9"], text="Monthly ($)")
+ fig_g.update_traces(texttemplate="$%{text:,.0f}", textposition="outside")
+ fig_g.update_layout(showlegend=False, coloraxis_showscale=False,
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=70,t=10,b=0), height=300,
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0"))
+ st.plotly_chart(fig_g, use_container_width=True)
+
+ with gc_r:
+ st.markdown("#### List cost vs actual spend")
+ svcs = gr.get("raw_services", [])
+ if svcs:
+ top5 = sorted(svcs, key=lambda x: -x["subtotal"])[:6]
+ savings_df = pd.DataFrame([
+ {"Service": s["service"][:20], "Type": "Actual (after savings)", "Cost": round(s["subtotal"], 2)}
+ for s in top5
+ ] + [
+ {"Service": s["service"][:20], "Type": "SADA Savings", "Cost": round(s.get("total_savings", 0), 2)}
+ for s in top5
+ ])
+ fig_s = px.bar(savings_df, x="Cost", y="Service", color="Type", orientation="h",
+ color_discrete_map={"Actual (after savings)": "#0ea5e9", "SADA Savings": "#10b981"},
+ barmode="stack")
+ fig_s.update_layout(showlegend=True, plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=70,t=10,b=0), height=300,
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0"),
+ legend=dict(orientation="h", yanchor="bottom", y=1.02))
+ st.plotly_chart(fig_s, use_container_width=True)
+
+ # MoM trend chart
+ svcs = gr.get("raw_services", [])
+ trend_svcs = [s for s in svcs if s.get("pct_change") is not None and abs(s["pct_change"]) > 5 and s["subtotal"] > 100]
+ if trend_svcs:
+ st.markdown("#### Month-over-month spend changes")
+ trend_df = pd.DataFrame([
+ {"Service": s["service"][:28], "Change (%)": s["pct_change"]}
+ for s in sorted(trend_svcs, key=lambda x: x["pct_change"])
+ ])
+ colors = ["#ef4444" if v > 0 else "#10b981" for v in trend_df["Change (%)"]]
+ fig_t = px.bar(trend_df, x="Change (%)", y="Service", orientation="h",
+ color="Change (%)",
+ color_continuous_scale=[[0,"#10b981"],[0.5,"#f59e0b"],[1,"#ef4444"]],
+ text="Change (%)")
+ fig_t.update_traces(texttemplate="%{text:+.0f}%", textposition="outside")
+ fig_t.add_vline(x=0, line_width=1, line_color="#94a3b8")
+ fig_t.update_layout(showlegend=False, coloraxis_showscale=False,
+ plot_bgcolor="white", paper_bgcolor="white",
+ margin=dict(l=0,r=70,t=10,b=0), height=max(220, len(trend_df)*32),
+ yaxis=dict(showgrid=False), xaxis=dict(showgrid=True, gridcolor="#f0f0f0"))
+ st.plotly_chart(fig_t, use_container_width=True)
+
+ # AI Spend insight
+ ai_insight = gr.get("ai_spend_insight", "")
+ if ai_insight:
+ st.markdown("---")
+ st.markdown("#### ๐ค AI / LLM Spend")
+ st.markdown(f'''
{ai_insight}
''', unsafe_allow_html=True)
+ # AI services breakdown
+ ai_svcs = [s for s in gr.get("raw_services",[]) if any(kw in s["service"].lower() for kw in ["claude","gemini","vertex","dialogflow"])]
+ if ai_svcs:
+ ai_total = sum(s["subtotal"] for s in ai_svcs)
+ ai_cols = st.columns(len(ai_svcs[:4]))
+ for col, svc in zip(ai_cols, sorted(ai_svcs, key=lambda x: -x["subtotal"])[:4]):
+ pct = svc.get("pct_change")
+ pct_str = f"{pct:+.0f}% MoM" if pct is not None else "New"
+ with col:
+ st.markdown(f'''
+
{svc["service"][:22]}
+
${svc["subtotal"]:,.0f}
+
{pct_str}
+
''', unsafe_allow_html=True)
+
+ # Quick wins
+ qw = gr.get("quick_wins", [])
+ if qw:
+ st.markdown("---")
+ st.markdown("#### โก Quick wins")
+ for w in qw[:3]:
+ st.markdown(f'''
โ
{w}
''', unsafe_allow_html=True)
- # Input
- if prompt_input := st.chat_input("Ask about your AWS costs..."):
- st.session_state.chat_history.append({"role":"user","content":prompt_input})
- with st.spinner("Thinking..."):
- ans = call_claude(st.session_state.chat_history)
- st.session_state.chat_history.append({"role":"assistant","content":ans})
- st.rerun()
+ st.markdown("
", unsafe_allow_html=True)
- if st.session_state.chat_history:
- if st.button("๐๏ธ Clear chat", use_container_width=True):
- st.session_state.chat_history = []
- st.rerun()
+ # Findings
+ st.markdown("#### ๐ GCP findings")
+ if not g_findings:
+ st.info("No findings in report.")
+ else:
+ show_gcp_actions = st.toggle("Show GCP remediation actions", value=False, key="gcp_actions_toggle")
+ for f in g_findings:
+ sev = f.get("severity","MEDIUM").lower()
+ opp = f.get("monthly_opportunity", 0)
+ saving_str = f"${opp:,.2f}/mo opportunity" if opp > 0 else "Investigate"
+ gcp_action = f.get("gcp_action", "")
+ action_html = f'''
$ {gcp_action}
''' if show_gcp_actions and gcp_action else ""
+ st.markdown(f"""
+
+
FINDING #{f.get("rank","")}
+
{f.get("service","")}
+
{f.get("plain_english","")}
+
Impact: {f.get("business_impact","")}
+
+ {f.get("severity","MEDIUM")}
+ ๐ท {f.get("category","")}
+ ๐ฐ {saving_str}
+
+
๐ง {f.get("priority_action","")}
+ {action_html}
+
""", unsafe_allow_html=True)
+
+ # SADA savings assessment + leadership recommendation
+ sada_insight = gr.get("sada_savings_assessment","")
+ if sada_insight:
+ st.markdown("---")
+ st.markdown("#### ๐ฐ SADA Discount Assessment")
+ st.success(sada_insight)
+
+ st.markdown("---")
+ st.markdown("#### ๐ Leadership recommendation")
+ st.info(gr.get("closing_recommendation",""))
+
+ st.markdown("---")
+ st.caption("Built for Perforce Global Jam 2026 ยท Team Ghost Busters ยท Cloud Cost Waste Hunter")
+
+ with gcp_chat:
+ st.markdown("""
+
+
๐ค GCP FinOps AI
+
+ Ask anything about your GCP costs
+
+
+ """, unsafe_allow_html=True)
+
+ if gr is None:
+ st.info("Run the GCP pipeline to enable this chatbot.")
+ else:
+ def build_gcp_context():
+ lines = [
+ "You are a senior FinOps engineer assistant for GCP cloud costs at Perforce.",
+ "Answer clearly and concisely, grounding every answer in the actual billing data below.",
+ "Keep answers to 3-5 sentences unless the user asks for detail.",
+ "",
+ f"Data source: {gr.get('source','SADA GCP Billing Report')}",
+ f"Total monthly GCP spend: ${gr.get('total_monthly_spend',0):,.2f}",
+ f"Additional monthly opportunity: ${gr.get('total_monthly_opportunity',0):,.2f}",
+ f"Executive summary: {gr.get('executive_summary','')}",
+ "",
+ "FINDINGS:",
+ ]
+ for fi in g_findings:
+ lines.append(
+ f"#{fi.get('rank','')} {fi.get('service','')} | "
+ f"${fi.get('monthly_opportunity',0):,.2f}/mo | "
+ f"{fi.get('plain_english','')[:120]}"
+ )
+ lines += ["", "QUICK WINS:"] + [f"- {w}" for w in gr.get("quick_wins",[])]
+ lines.append(f"AI insight: {gr.get('ai_spend_insight','')}")
+ return "\n".join(l for l in lines if l is not None)
+
+ def call_claude_gcp(messages):
+ api_key = os.environ.get("ANTHROPIC_API_KEY","")
+ if not api_key:
+ return "โ ๏ธ ANTHROPIC_API_KEY not set."
+ try:
+ payload = json.dumps({
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": 800,
+ "system": build_gcp_context(),
+ "messages": messages
+ }).encode()
+ req = urllib.request.Request(
+ "https://api.anthropic.com/v1/messages",
+ data=payload,
+ headers={"Content-Type":"application/json","x-api-key":api_key,"anthropic-version":"2023-06-01"},
+ method="POST"
+ )
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ data = json.loads(resp.read().decode())
+ return data["content"][0]["text"]
+ except Exception as e:
+ return f"โ Error: {e}"
+
+ if "gcp_chat_history" not in st.session_state:
+ st.session_state.gcp_chat_history = []
+
+ gcp_suggestions = [
+ "What is driving our GCP spend?",
+ "Why is Claude Sonnet so expensive?",
+ "How much are we saving with SADA?",
+ "What can we do about Compute Engine costs?",
+ "Give me a GCP cost reduction plan",
+ ]
+ st.markdown("
๐ก Suggested questions:
", unsafe_allow_html=True)
+ for i, sug in enumerate(gcp_suggestions):
+ if st.button(sug, key=f"gcp_sug_{i}", use_container_width=True):
+ st.session_state.gcp_chat_history.append({"role":"user","content":sug})
+ with st.spinner("Thinking..."):
+ ans = call_claude_gcp(st.session_state.gcp_chat_history)
+ st.session_state.gcp_chat_history.append({"role":"assistant","content":ans})
+
+ st.markdown("
", unsafe_allow_html=True)
+ for msg in st.session_state.gcp_chat_history:
+ if msg["role"] == "user":
+ st.markdown(f"
You: {msg['content']}
", unsafe_allow_html=True)
+ else:
+ st.markdown(f"
๐ค GCP AI: {msg['content']}
", unsafe_allow_html=True)
+ st.markdown("
", unsafe_allow_html=True)
+
+ if prompt_gcp := st.chat_input("Ask about your GCP costs...", key="gcp_chat_input"):
+ st.session_state.gcp_chat_history.append({"role":"user","content":prompt_gcp})
+ with st.spinner("Thinking..."):
+ ans = call_claude_gcp(st.session_state.gcp_chat_history)
+ st.session_state.gcp_chat_history.append({"role":"assistant","content":ans})
+ st.rerun()
+
+ if st.session_state.gcp_chat_history:
+ if st.button("๐๏ธ Clear GCP chat", use_container_width=True, key="clear_gcp_chat"):
+ st.session_state.gcp_chat_history = []
+ st.rerun()
diff --git a/gcp_analyzer.py b/gcp_analyzer.py
new file mode 100644
index 0000000..675e1f5
--- /dev/null
+++ b/gcp_analyzer.py
@@ -0,0 +1,153 @@
+"""
+GhostBusters โ GCP LLM Analyzer
+Sends gcp_findings.json to Claude and produces gcp_report.json
+"""
+
+import json
+import re
+import os
+import urllib.request
+from datetime import datetime
+
+
+def load_gcp_findings(filepath: str = "gcp_findings.json") -> dict:
+ with open(filepath) as f:
+ return json.load(f)
+
+
+def build_gcp_prompt(data: dict) -> str:
+ total_spend = data["total_monthly_spend"]
+ total_list = data["total_list_cost"]
+ total_saved = data["total_savings_applied"]
+ efficiency = data["savings_efficiency_pct"]
+ opportunity = data["total_monthly_opportunity"]
+ n_findings = data["total_findings"]
+ source = data["source"]
+
+ top_findings = data["findings"][:12]
+ findings_txt = json.dumps(top_findings, indent=2)
+
+ # Build AI spend summary
+ ai_findings = [f for f in data["findings"] if "AI" in f.get("category","") or "claude" in f.get("service","").lower() or "gemini" in f.get("service","").lower()]
+ ai_txt = json.dumps(ai_findings, indent=2) if ai_findings else "none"
+
+ return f"""You are a senior FinOps engineer and GCP cloud cost analyst at Perforce.
+You are analyzing a SADA billing report for the period covered by: {source}
+
+ACCOUNT SUMMARY:
+- Cloud provider: GCP (via SADA reseller)
+- Total monthly spend (after discounts): ${total_spend:,.2f}
+- Total list cost (before any discounts): ${total_list:,.2f}
+- Total savings applied by SADA: ${total_saved:,.2f} ({efficiency}% discount rate)
+- Additional optimization opportunity identified: ${opportunity:,.2f}/mo
+- Total findings: {n_findings}
+- Waste by category: {json.dumps(data.get("waste_by_category", {}))}
+
+TOP FINDINGS (ranked by opportunity):
+{findings_txt}
+
+AI/LLM SPEND:
+{ai_txt}
+
+Respond ONLY with a valid JSON object โ no preamble, no markdown fences. Use this exact schema:
+{{
+ "executive_summary": "3-sentence summary a CTO would read. Include total GCP spend, what SADA already saves, remaining opportunity, and biggest risk.",
+ "total_monthly_spend":
,
+ "total_monthly_opportunity": ,
+ "total_annual_opportunity": ,
+ "savings_already_applied": ,
+ "findings": [
+ {{
+ "rank": ,
+ "service": "",
+ "category": "",
+ "severity": "HIGH|MEDIUM|LOW",
+ "monthly_cost": ,
+ "monthly_opportunity": ,
+ "plain_english": "2-sentence explanation of the issue and why it costs money. No jargon.",
+ "business_impact": "1 sentence on the business risk if left unaddressed.",
+ "priority_action": "One concrete action the team should take this week.",
+ "gcp_action": ""
+ }}
+ ],
+ "quick_wins": ["3 GCP-specific actions the team can take today that require zero downtime"],
+ "ai_spend_insight": "2-sentence insight specifically about AI/LLM spend (Claude, Vertex, Gemini). Include trend and recommendation.",
+ "sada_savings_assessment": "1-sentence assessment of how well SADA negotiated discounts and whether there is room to push for more.",
+ "service_breakdown": {{
+ "biggest_concern": "",
+ "most_improved": "",
+ "watch_list": ["", "", ""]
+ }},
+ "closing_recommendation": "2-sentence closing advice for engineering leadership on GCP cost governance."
+}}"""
+
+
+def call_claude(prompt: str) -> str:
+ api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+ if not api_key:
+ raise ValueError("ANTHROPIC_API_KEY not set. Run: export ANTHROPIC_API_KEY='sk-ant-...'")
+
+ payload = json.dumps({
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": 4000,
+ "messages": [{"role": "user", "content": prompt}]
+ }).encode()
+
+ req = urllib.request.Request(
+ "https://api.anthropic.com/v1/messages",
+ data=payload,
+ headers={
+ "Content-Type": "application/json",
+ "x-api-key": api_key,
+ "anthropic-version": "2023-06-01",
+ },
+ method="POST"
+ )
+ with urllib.request.urlopen(req, timeout=60) as resp:
+ data = json.loads(resp.read().decode())
+ return data["content"][0]["text"]
+
+
+def extract_json(raw: str) -> dict:
+ raw = raw.strip()
+ # Strip markdown fences if present
+ raw = re.sub(r"^```(?:json)?\s*", "", raw)
+ raw = re.sub(r"\s*```$", "", raw)
+ return json.loads(raw)
+
+
+def analyze(findings_path: str = "gcp_findings.json", output_path: str = "gcp_report.json"):
+ print(f"[GhostBusters GCP Analyzer] Loading {findings_path}")
+ data = load_gcp_findings(findings_path)
+
+ # Pass raw findings data through to report
+ prompt = build_gcp_prompt(data)
+
+ print("[GhostBusters GCP Analyzer] Calling Claude...")
+ raw = call_claude(prompt)
+
+ report = extract_json(raw)
+
+ # Enrich with raw data for dashboard charts
+ report["source"] = data.get("source", "SADA GCP Billing Report")
+ report["cloud"] = "GCP"
+ report["generated_at"] = datetime.now().isoformat()
+ report["raw_services"] = data.get("services", [])
+ report["all_findings"] = data.get("findings", [])
+ report["total_list_cost"] = data.get("total_list_cost", 0)
+ report["savings_efficiency_pct"] = data.get("savings_efficiency_pct", 0)
+
+ with open(output_path, "w") as fh:
+ json.dump(report, fh, indent=2)
+
+ print(f"[GhostBusters GCP Analyzer] Report written to {output_path}")
+ print(f" Total spend: ${report.get('total_monthly_spend',0):,.2f}/mo")
+ print(f" Opportunity: ${report.get('total_monthly_opportunity',0):,.2f}/mo")
+ print(f" Annual opportunity:${report.get('total_annual_opportunity',0):,.2f}")
+ return report
+
+
+if __name__ == "__main__":
+ findings_path = os.environ.get("GHOSTBUSTERS_GCP_FINDINGS", "gcp_findings.json")
+ output_path = os.environ.get("GHOSTBUSTERS_GCP_REPORT", "gcp_report.json")
+ analyze(findings_path, output_path)
diff --git a/gcp_detection_engine.py b/gcp_detection_engine.py
new file mode 100644
index 0000000..ca7aa8f
--- /dev/null
+++ b/gcp_detection_engine.py
@@ -0,0 +1,447 @@
+"""
+GhostBusters โ GCP Detection Engine
+Reads a SADA billing CSV (service-level GCP report) and runs waste / risk detectors.
+Output: gcp_findings.json
+"""
+
+import pandas as pd
+import json
+import os
+import re
+from datetime import datetime
+
+# โโโ Thresholds โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+SPIKE_PCT_THRESHOLD = 10.0 # % MoM increase โ flag as spike
+DROP_PCT_THRESHOLD = 50.0 # % MoM decrease โ investigate sharp drop
+SUPPORT_PCT_OF_SPEND = 3.0 # Support > 3% of total = excessive
+AI_SERVICES = { # GCP AI/LLM service keywords
+ "claude", "gemini", "vertex ai", "vertex", "dialogflow",
+ "natural language", "vision api", "speech api", "translation",
+}
+CUD_ELIGIBLE = { # Services that can use Committed Use Discounts
+ "compute engine", "cloud sql", "kubernetes engine",
+ "cloud run", "cloud spanner", "bigtable",
+}
+LOGGING_SPIKE_USD = 5000 # Cloud Logging above this monthly = review retention
+
+# โโโ CSV loader โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+def load_sada_csv(filepath: str) -> pd.DataFrame:
+ """
+ Parse a SADA GCP billing CSV. Strips summary rows (Subtotal/Tax/Total).
+ Returns a clean DataFrame with numeric columns coerced.
+ """
+ df = pd.read_csv(filepath)
+ # Drop summary rows (Service description is blank or is a subtotal marker)
+ df = df[df["Service description"].notna() & (df["Service description"].str.strip() != "")]
+ # Drop any row where Service ID is blank (summary lines)
+ if "Service ID" in df.columns:
+ df = df[df["Service ID"].notna() & (df["Service ID"].str.strip() != "")]
+
+ # Rename for convenience
+ df = df.rename(columns={
+ "Service description": "service",
+ "Service ID": "service_id",
+ "List cost ($)": "list_cost",
+ "Negotiated savings ($)": "negotiated_savings",
+ "Savings programs ($)": "savings_programs",
+ "Other savings ($)": "other_savings",
+ "Unrounded subtotal ($)": "unrounded_subtotal",
+ "Subtotal ($)": "subtotal",
+ "Percent change in subtotal compared to previous period": "pct_change_raw",
+ })
+
+ # Coerce numeric
+ for col in ["list_cost","negotiated_savings","savings_programs","other_savings","subtotal"]:
+ df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
+
+ # Parse percent change โ "29%", "-2%", "New", "0%"
+ def parse_pct(v):
+ v = str(v).strip()
+ if v.lower() == "new":
+ return None # new service, no prior period
+ m = re.search(r"[-\d.]+", v)
+ return float(m.group()) if m else None
+
+ df["pct_change"] = df["pct_change_raw"].apply(parse_pct)
+ df["is_new"] = df["pct_change_raw"].str.strip().str.lower() == "new"
+ df["total_savings"] = (
+ df["negotiated_savings"].abs() +
+ df["savings_programs"].abs() +
+ df["other_savings"].abs()
+ )
+ return df.reset_index(drop=True)
+
+
+# โโโ Detectors โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+def detect_spend_spikes(df: pd.DataFrame) -> list[dict]:
+ """Flag services with MoM increase above threshold."""
+ findings = []
+ for _, row in df.iterrows():
+ pct = row["pct_change"]
+ if pct is None or pct <= SPIKE_PCT_THRESHOLD:
+ continue
+ findings.append({
+ "detector": "spend_spike",
+ "category": "Cost Spike",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": pct,
+ "severity": "HIGH" if pct > 20 else "MEDIUM",
+ "plain_english": (
+ f"{row['service']} spend grew {pct:+.0f}% month-over-month to "
+ f"${row['subtotal']:,.2f}/mo. This is above the {SPIKE_PCT_THRESHOLD}% "
+ f"acceptable variance threshold."
+ ),
+ "monthly_opportunity": 0,
+ "priority_action": (
+ f"Investigate what drove the {pct:+.0f}% increase in {row['service']}. "
+ f"Check for new workloads, quota increases, or billing anomalies in GCP Console โ Billing โ Cost breakdown."
+ ),
+ })
+ return sorted(findings, key=lambda x: -x["pct_change"])
+
+
+def detect_sharp_drops(df: pd.DataFrame) -> list[dict]:
+ """Flag services with >50% MoM drop โ may indicate unintentional shutdowns."""
+ findings = []
+ for _, row in df.iterrows():
+ pct = row["pct_change"]
+ if pct is None or pct >= -DROP_PCT_THRESHOLD:
+ continue
+ if row["subtotal"] < 10: # too small to care
+ continue
+ findings.append({
+ "detector": "sharp_drop",
+ "category": "Anomaly",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": pct,
+ "severity": "MEDIUM",
+ "plain_english": (
+ f"{row['service']} dropped {abs(pct):.0f}% MoM to ${row['subtotal']:,.2f}/mo. "
+ f"Large drops can indicate accidental shutdowns, missing workloads, or billing credit anomalies."
+ ),
+ "monthly_opportunity": 0,
+ "priority_action": (
+ f"Verify that the drop in {row['service']} is intentional. "
+ f"Check GCP Console โ Billing โ Cost table for credits or terminated resources."
+ ),
+ })
+ return sorted(findings, key=lambda x: x["pct_change"])
+
+
+def detect_ai_spend(df: pd.DataFrame, total_spend: float) -> list[dict]:
+ """Aggregate AI/LLM services and flag if they represent a large share of spend."""
+ findings = []
+ ai_rows = df[df["service"].str.lower().apply(
+ lambda s: any(kw in s for kw in AI_SERVICES)
+ )]
+ if ai_rows.empty:
+ return findings
+
+ ai_total = ai_rows["subtotal"].sum()
+ ai_pct = round(ai_total / total_spend * 100, 1) if total_spend else 0
+ ai_list = ai_rows.sort_values("subtotal", ascending=False)[["service","subtotal","pct_change"]].to_dict("records")
+
+ # Flag the top AI spender separately
+ top = ai_rows.loc[ai_rows["subtotal"].idxmax()]
+ top_pct = top.get("pct_change", 0) or 0
+
+ findings.append({
+ "detector": "ai_spend",
+ "category": "AI / LLM Cost",
+ "service": "AI/LLM Services (aggregated)",
+ "service_id": "AGGREGATED",
+ "monthly_cost": round(ai_total, 2),
+ "pct_change": None,
+ "severity": "HIGH" if ai_pct > 15 else "MEDIUM",
+ "plain_english": (
+ f"AI and LLM services account for ${ai_total:,.2f}/mo ({ai_pct}% of total GCP spend). "
+ f"Top spender: {top['service']} at ${top['subtotal']:,.2f}/mo "
+ f"({top_pct:+.0f}% vs last month)."
+ ),
+ "monthly_opportunity": 0,
+ "priority_action": (
+ "Review AI API usage logs for unused or test calls. "
+ "Implement request caching, prompt compression, and consider smaller models "
+ "for non-critical workloads. Set budget alerts at 80% of expected AI spend."
+ ),
+ "breakdown": ai_list,
+ })
+
+ # Flag each AI service that's spiking
+ for _, row in ai_rows.iterrows():
+ pct = row["pct_change"]
+ if pct and pct > SPIKE_PCT_THRESHOLD and row["subtotal"] > 100:
+ findings.append({
+ "detector": "ai_spike",
+ "category": "AI / LLM Cost",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": pct,
+ "severity": "HIGH" if pct > 25 else "MEDIUM",
+ "plain_english": (
+ f"{row['service']} grew {pct:+.0f}% MoM to ${row['subtotal']:,.2f}/mo. "
+ f"Unchecked AI API growth can quickly become the largest cost driver."
+ ),
+ "monthly_opportunity": round(row["subtotal"] * 0.25, 2),
+ "priority_action": (
+ f"Audit {row['service']} call volume. Add rate limits, caching, "
+ f"and model-tier routing (use smaller/cheaper models for drafts). "
+ f"Set a GCP budget alert on this service."
+ ),
+ })
+ return findings
+
+
+def detect_cud_opportunity(df: pd.DataFrame) -> list[dict]:
+ """Flag CUD-eligible services with low/no committed use discounts."""
+ findings = []
+ for _, row in df.iterrows():
+ svc_lower = row["service"].lower()
+ if not any(kw in svc_lower for kw in CUD_ELIGIBLE):
+ continue
+ savings_programs_abs = abs(row["savings_programs"])
+ list_cost = row["list_cost"]
+ if list_cost < 500:
+ continue
+ cud_coverage = savings_programs_abs / list_cost if list_cost else 0
+ if cud_coverage > 0.40: # already has good CUD coverage
+ continue
+ potential = round(list_cost * 0.30, 2) # conservative 30% CUD saving estimate
+ findings.append({
+ "detector": "cud_opportunity",
+ "category": "Reserved / Committed Use",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": row["pct_change"],
+ "severity": "HIGH" if list_cost > 5000 else "MEDIUM",
+ "plain_english": (
+ f"{row['service']} has a list cost of ${list_cost:,.2f}/mo with only "
+ f"{cud_coverage*100:.0f}% covered by Committed Use Discounts. "
+ f"Purchasing 1-year CUDs could save ~30%."
+ ),
+ "monthly_opportunity": potential,
+ "priority_action": (
+ f"Purchase 1-year Committed Use Discounts for {row['service']} in "
+ f"GCP Console โ Billing โ Committed use discounts. "
+ f"Estimated saving: ${potential:,.2f}/mo."
+ ),
+ })
+ return sorted(findings, key=lambda x: -x["monthly_opportunity"])
+
+
+def detect_excessive_support(df: pd.DataFrame, total_spend: float) -> list[dict]:
+ """Flag if Support cost exceeds threshold % of total spend."""
+ findings = []
+ support_rows = df[df["service"].str.lower().str.contains("support")]
+ if support_rows.empty:
+ return findings
+ support_cost = support_rows["subtotal"].sum()
+ support_pct = round(support_cost / total_spend * 100, 1) if total_spend else 0
+ if support_pct < SUPPORT_PCT_OF_SPEND:
+ return findings
+ findings.append({
+ "detector": "excessive_support",
+ "category": "Support Overhead",
+ "service": "Support",
+ "service_id": support_rows.iloc[0]["service_id"],
+ "monthly_cost": round(support_cost, 2),
+ "pct_change": support_rows.iloc[0]["pct_change"],
+ "severity": "MEDIUM",
+ "plain_english": (
+ f"Support charges are ${support_cost:,.2f}/mo ({support_pct}% of total GCP spend). "
+ f"This exceeds the {SUPPORT_PCT_OF_SPEND}% benchmark for a well-optimised account."
+ ),
+ "monthly_opportunity": round(support_cost * 0.20, 2),
+ "priority_action": (
+ "Review SADA support tier vs. actual tickets opened. "
+ "Consider downgrading support tier if ticket volume is low, "
+ "or consolidating support contracts across GCP projects."
+ ),
+ })
+ return findings
+
+
+def detect_logging_costs(df: pd.DataFrame) -> list[dict]:
+ """Flag high Cloud Logging costs โ often driven by verbose log sinks."""
+ findings = []
+ log_rows = df[df["service"].str.lower().str.contains("logging")]
+ if log_rows.empty:
+ return findings
+ log_cost = log_rows["subtotal"].sum()
+ if log_cost < LOGGING_SPIKE_USD:
+ return findings
+ findings.append({
+ "detector": "logging_costs",
+ "category": "Log Retention",
+ "service": "Cloud Logging",
+ "service_id": log_rows.iloc[0]["service_id"],
+ "monthly_cost": round(log_cost, 2),
+ "pct_change": log_rows.iloc[0]["pct_change"],
+ "severity": "MEDIUM",
+ "plain_english": (
+ f"Cloud Logging costs ${log_cost:,.2f}/mo. "
+ f"Verbose application logs, audit logs, and VPC flow logs often account for "
+ f"60-70% of this spend โ much of it stored indefinitely."
+ ),
+ "monthly_opportunity": round(log_cost * 0.60, 2),
+ "priority_action": (
+ "In GCP Console โ Logging โ Log Router, exclude high-volume low-value logs "
+ "(DEBUG, INFO for non-critical services). Apply 30-day retention to all non-audit "
+ "log buckets. Archive to Cloud Storage for compliance if needed."
+ ),
+ })
+ return findings
+
+
+def detect_new_services(df: pd.DataFrame) -> list[dict]:
+ """Flag brand-new services that appeared this billing period."""
+ findings = []
+ new_rows = df[df["is_new"] == True]
+ for _, row in new_rows.iterrows():
+ findings.append({
+ "detector": "new_service",
+ "category": "New Service",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": None,
+ "severity": "LOW",
+ "plain_english": (
+ f"{row['service']} appeared for the first time this billing period "
+ f"at ${row['subtotal']:,.2f}. "
+ f"New services should be reviewed to ensure they are intentional and owned."
+ ),
+ "monthly_opportunity": 0,
+ "priority_action": (
+ f"Confirm {row['service']} was intentionally enabled. "
+ f"Assign an owner, add budget alerts, and tag the associated GCP project."
+ ),
+ })
+ return findings
+
+
+def detect_unused_savings(df: pd.DataFrame) -> list[dict]:
+ """
+ Flag large services (>$1000/mo list cost) with zero savings programs applied
+ that are CUD-eligible โ they are leaving money on the table.
+ """
+ findings = []
+ for _, row in df.iterrows():
+ if row["list_cost"] < 1000:
+ continue
+ if abs(row["savings_programs"]) > 0:
+ continue
+ svc_lower = row["service"].lower()
+ if not any(kw in svc_lower for kw in CUD_ELIGIBLE):
+ continue
+ potential = round(row["list_cost"] * 0.30, 2)
+ findings.append({
+ "detector": "zero_savings_programs",
+ "category": "Reserved / Committed Use",
+ "service": row["service"],
+ "service_id": row["service_id"],
+ "monthly_cost": round(row["subtotal"], 2),
+ "pct_change": row["pct_change"],
+ "severity": "HIGH" if row["list_cost"] > 5000 else "MEDIUM",
+ "plain_english": (
+ f"{row['service']} has a list cost of ${row['list_cost']:,.2f}/mo "
+ f"with $0 in Savings Programs applied. "
+ f"Purchasing CUDs could save ~30% (~${potential:,.2f}/mo)."
+ ),
+ "monthly_opportunity": potential,
+ "priority_action": (
+ f"Evaluate 1-year or 3-year Committed Use Discounts for "
+ f"{row['service']} to reduce on-demand pricing."
+ ),
+ })
+ return sorted(findings, key=lambda x: -x["monthly_opportunity"])
+
+
+# โโโ Main pipeline โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+def run_gcp_detection(csv_path: str, output_path: str = "gcp_findings.json"):
+ print(f"[GhostBusters GCP] Loading {csv_path}")
+ df = load_sada_csv(csv_path)
+
+ total_spend = round(df["subtotal"].sum(), 2)
+ total_list_cost = round(df["list_cost"].sum(), 2)
+ total_savings = round(df["total_savings"].sum(), 2)
+ savings_pct = round(total_savings / total_list_cost * 100, 1) if total_list_cost else 0
+ period_start = csv_path # embed filename as source reference
+
+ print(f"[GhostBusters GCP] {len(df)} services, total spend: ${total_spend:,.2f}")
+
+ # Run all detectors
+ all_findings: list[dict] = []
+ all_findings += detect_spend_spikes(df)
+ all_findings += detect_sharp_drops(df)
+ all_findings += detect_ai_spend(df, total_spend)
+ all_findings += detect_cud_opportunity(df)
+ all_findings += detect_excessive_support(df, total_spend)
+ all_findings += detect_logging_costs(df)
+ all_findings += detect_new_services(df)
+ all_findings += detect_unused_savings(df)
+
+ # De-duplicate: keep highest severity per service per detector type
+ seen = set()
+ deduped = []
+ for f in all_findings:
+ key = (f["detector"], f["service"])
+ if key not in seen:
+ seen.add(key)
+ deduped.append(f)
+
+ # Rank by monthly_opportunity desc, then by severity
+ sev_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
+ deduped.sort(key=lambda x: (-x["monthly_opportunity"], sev_order.get(x["severity"], 3)))
+ for i, f in enumerate(deduped, 1):
+ f["rank"] = i
+
+ # Services list for dashboard charts
+ services_list = df.sort_values("subtotal", ascending=False)[
+ ["service","service_id","list_cost","subtotal","total_savings","pct_change","pct_change_raw","is_new"]
+ ].to_dict("records")
+
+ total_opportunity = round(sum(f.get("monthly_opportunity", 0) for f in deduped), 2)
+
+ summary = {
+ "cloud": "GCP",
+ "source": os.path.basename(csv_path),
+ "generated_at": datetime.now().isoformat(),
+ "total_monthly_spend": total_spend,
+ "total_list_cost": total_list_cost,
+ "total_savings_applied":total_savings,
+ "savings_efficiency_pct": savings_pct,
+ "total_monthly_opportunity": total_opportunity,
+ "total_annual_opportunity": round(total_opportunity * 12, 2),
+ "total_findings": len(deduped),
+ "services": services_list,
+ "findings": deduped,
+ "waste_by_category": {
+ cat: round(sum(f["monthly_opportunity"] for f in deduped if f["category"] == cat), 2)
+ for cat in set(f["category"] for f in deduped)
+ },
+ }
+
+ with open(output_path, "w") as fh:
+ json.dump(summary, fh, indent=2)
+
+ print(f"[GhostBusters GCP] {len(deduped)} findings, ${total_opportunity:,.2f}/mo opportunity โ {output_path}")
+ return summary
+
+
+if __name__ == "__main__":
+ sada_csv = os.environ.get(
+ "GHOSTBUSTERS_GCP_CSV",
+ "Perforce Software, Inc. - SADA_Reports, 2026-05-01 \u2014 2026-05-26.csv"
+ )
+ out_path = os.environ.get("GHOSTBUSTERS_GCP_FINDINGS", "gcp_findings.json")
+ run_gcp_detection(sada_csv, out_path)