-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
2441 lines (2077 loc) Β· 106 KB
/
script.py
File metadata and controls
2441 lines (2077 loc) Β· 106 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
from pathlib import Path
from typing import List, Optional, Tuple
import numpy as np
import pandas as pd
# Enhanced UI imports for colourful dashboard
try:
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.colored_header import colored_header
from streamlit_extras.badges import badge
from streamlit_option_menu import option_menu
from streamlit_card import card
from streamlit_lottie import st_lottie
except Exception: # pragma: no cover
style_metric_cards = None
colored_header = None
badge = None
option_menu = None
card = None
st_lottie = None
# Guard optional plotting libs to avoid hard failures
try:
import plotly.express as px
import plotly.graph_objects as go
except Exception: # pragma: no cover
px = None
go = None
try:
import streamlit as st
except Exception as e: # pragma: no cover
raise SystemExit("Streamlit must be installed to run this app: pip install streamlit plotly pandas numpy") from e
# Optional analytics dependencies
try:
from statsmodels.tsa.seasonal import seasonal_decompose
except Exception: # pragma: no cover
seasonal_decompose = None
try:
from prophet import Prophet # type: ignore
except Exception: # pragma: no cover
Prophet = None
try:
from sklearn.linear_model import Ridge # type: ignore
from sklearn.model_selection import train_test_split # type: ignore
except Exception: # pragma: no cover
Ridge = None
train_test_split = None
# -------------------------------
# Data loading and utilities
# -------------------------------
BASE_DIR = Path(__file__).resolve().parent
CSV_FILES = {
"marketing": BASE_DIR / "doordash powers marketing.csv",
"operations": BASE_DIR / "doordash powers operations.csv",
"payouts": BASE_DIR / "doordash powers payouts.csv",
"sales": BASE_DIR / "doordash powers sales.csv",
}
def _safe_lower(text: str) -> str:
return text.lower().strip()
def find_column_by_keywords(df: pd.DataFrame, keywords: List[str]) -> Optional[str]:
"""Return the first dataframe column whose lowercase name contains any of the keywords (lowercased).
Useful when exact column names may vary slightly across exports.
"""
lowered = {col: _safe_lower(col) for col in df.columns}
for col, low in lowered.items():
for kw in keywords:
if _safe_lower(kw) in low:
return col
return None
def safe_divide(numerator, denominator):
"""Robust element-wise divide that tolerates zeros/NaNs and scalar denominators.
Rules:
- If denominator is 0 or NaN β return numerator
- Otherwise β return numerator / denominator
Works for scalars, Series, and DataFrames without shape-mismatch errors.
"""
# Normalize inputs to pandas objects when possible for consistent behavior
num = numerator
den = denominator
if not isinstance(num, (pd.Series, pd.DataFrame)):
num = pd.to_numeric(num, errors="coerce")
if not isinstance(den, (pd.Series, pd.DataFrame)):
den = pd.to_numeric(den, errors="coerce")
# Fast-path: scalar denominator (common in min-max normalization)
if np.isscalar(den) or (isinstance(den, (pd.Series, pd.DataFrame)) and den.size == 1):
den_scalar = den if np.isscalar(den) else (den.values.item() if hasattr(den, "values") else float(den))
if pd.isna(den_scalar) or den_scalar == 0:
return num
with np.errstate(divide="ignore", invalid="ignore"):
return num / den_scalar
# General case: align shapes where possible
aligned_den = den
if isinstance(num, pd.Series) and isinstance(den, pd.Series):
aligned_den = den.reindex_like(num)
elif isinstance(num, pd.DataFrame) and isinstance(den, pd.DataFrame):
aligned_den = den.reindex_like(num)
mask = (aligned_den == 0) | pd.isna(aligned_den)
with np.errstate(divide="ignore", invalid="ignore"):
result = num / aligned_den
# Where denominator invalid, fall back to numerator
if isinstance(result, (pd.Series, pd.DataFrame)):
result = result.mask(mask, num)
result = result.replace([np.inf, -np.inf], np.nan).mask(mask, num)
else:
if bool(np.any(mask)):
result = num
if np.isinf(result):
result = num
return result
@st.cache_data(show_spinner=False)
def load_csv(path: Path) -> pd.DataFrame:
if not path.exists():
return pd.DataFrame()
try:
return pd.read_csv(path)
except Exception:
# Fallback for potential encoding issues
return pd.read_csv(path, encoding="latin-1")
def parse_datetime_column(df: pd.DataFrame, col_name: str) -> pd.DataFrame:
if col_name in df.columns:
df = df.copy()
df[col_name] = pd.to_datetime(df[col_name], errors="coerce")
return df
def add_week_start(df: pd.DataFrame, date_col: str, new_col: str = "Week") -> pd.DataFrame:
if date_col not in df.columns:
return df
df = df.copy()
series = pd.to_datetime(df[date_col], errors="coerce")
df[new_col] = series.dt.to_period("W-MON").apply(lambda r: r.start_time)
return df
def filter_df_by_date(df: pd.DataFrame, date_col: str, start: Optional[pd.Timestamp], end: Optional[pd.Timestamp]) -> pd.DataFrame:
if df.empty or date_col not in df.columns:
return df
mask = pd.Series(True, index=df.index)
dates = pd.to_datetime(df[date_col], errors="coerce")
if start is not None:
mask &= dates >= start
if end is not None:
mask &= dates <= end
return df.loc[mask]
def make_metric_card(label: str, value, delta: Optional[str] = None, help_text: Optional[str] = None):
col = st.container()
with col:
st.metric(label=label, value=value, delta=delta, help=help_text)
# -------------------------------
# Section: Marketing
# -------------------------------
def section_marketing(marketing_df: pd.DataFrame):
# Colourful section header
if colored_header:
colored_header(
label="π’ Marketing Analytics",
description="Campaign performance, customer acquisition, and ROI analysis",
color_name="green-70"
)
else:
st.markdown("## π’ Marketing Analytics")
st.markdown("*Campaign performance, customer acquisition, and ROI analysis*")
if marketing_df.empty:
st.info("π Marketing CSV not found or empty.")
return
# Parse and prepare
marketing_df = parse_datetime_column(marketing_df, "Date")
# Marketing period comparison (fixed periods)
st.sidebar.markdown("**Marketing period comparison**")
default_pre_start = pd.Timestamp("2025-06-01")
default_pre_end = pd.Timestamp("2025-07-02")
default_post_start = pd.Timestamp("2025-07-03")
default_post_end = pd.Timestamp("2025-08-03")
st.sidebar.info(f"π
**Pre Period:** {default_pre_start.strftime('%Y-%m-%d')} to {default_pre_end.strftime('%Y-%m-%d')}")
st.sidebar.info(f"π
**Post Period:** {default_post_start.strftime('%Y-%m-%d')} to {default_post_end.strftime('%Y-%m-%d')}")
pre_range = (default_pre_start, default_pre_end)
post_range = (default_post_start, default_post_end)
# Metrics of interest
metrics = [
"Orders",
"Sales",
"Customer Discounts from Marketing | (Funded by you)",
"Marketing Fees | (Including any applicable taxes)",
"Average Order Value",
"ROAS",
"New Customers Acquired",
"Total Customers Acquired",
]
# Build daily view (many notebook charts use `daily`)
daily = marketing_df.copy()
if "Date" in daily.columns:
daily = daily.sort_values("Date")
# If multiple rows per date, aggregate by sum/mean mix where appropriate
agg_map = {}
for m in metrics:
if m in daily.columns:
# numeric metrics -> sum; averages -> mean
if "Average" in m:
agg_map[m] = "mean"
else:
agg_map[m] = "sum"
if agg_map:
daily = (
daily.groupby("Date", as_index=False)
[list(agg_map.keys())]
.agg(agg_map)
)
else:
st.warning("Marketing data missing 'Date' column; some charts may be unavailable.")
# KPI cards
kpi_cols = st.columns(4)
with kpi_cols[0]:
total_orders = daily.get("Orders", pd.Series(dtype=float)).sum()
st.metric("Total Orders", f"{int(total_orders):,}")
with kpi_cols[1]:
total_sales = daily.get("Sales", pd.Series(dtype=float)).sum()
st.metric("Total Sales", f"${total_sales:,.0f}")
with kpi_cols[2]:
aov = daily.get("Average Order Value", pd.Series(dtype=float)).mean()
if pd.notnull(aov):
st.metric("Average Order Value", f"${aov:,.2f}")
else:
st.metric("Average Order Value", "β")
with kpi_cols[3]:
new_customers = daily.get("New Customers Acquired", pd.Series(dtype=float)).sum()
st.metric("New Customers", f"{int(new_customers):,}")
# Pre/Post comparison table
if "Date" in marketing_df.columns:
pre_start, pre_end = pre_range if isinstance(pre_range, tuple) else (None, None)
post_start, post_end = post_range if isinstance(post_range, tuple) else (None, None)
pre_df = filter_df_by_date(marketing_df, "Date", pd.to_datetime(pre_start), pd.to_datetime(pre_end))
post_df = filter_df_by_date(marketing_df, "Date", pd.to_datetime(post_start), pd.to_datetime(post_end))
def agg_series(df: pd.DataFrame) -> pd.Series:
vals = {}
for m in metrics:
if m not in df.columns:
continue
if "Average" in m:
vals[m] = df[m].mean()
else:
vals[m] = df[m].sum()
return pd.Series(vals)
pre_summary = agg_series(pre_df).rename("Pre")
post_summary = agg_series(post_df).rename("Post")
comparison = pd.concat([pre_summary, post_summary], axis=1)
comparison["Ξ Absolute"] = comparison["Post"] - comparison["Pre"]
comparison["Ξ % Change"] = safe_divide(comparison["Ξ Absolute"], comparison["Pre"]) * 100
# Ensure numeric dtypes to avoid formatting bugs and then build a display copy
numeric_cols = ["Pre", "Post", "Ξ Absolute", "Ξ % Change"]
comparison_numeric = comparison.copy()
for c in numeric_cols:
comparison_numeric[c] = pd.to_numeric(comparison_numeric[c], errors="coerce")
def fmt_int(x):
return "β" if pd.isna(x) else f"{x:,.0f}"
def fmt_pct(x):
return "β" if pd.isna(x) else f"{x:.2f}%"
display_df = comparison_numeric.copy()
display_df["Pre"] = comparison_numeric["Pre"].map(fmt_int)
display_df["Post"] = comparison_numeric["Post"].map(fmt_int)
display_df["Ξ Absolute"] = comparison_numeric["Ξ Absolute"].map(fmt_int)
display_df["Ξ % Change"] = comparison_numeric["Ξ % Change"].map(fmt_pct)
st.markdown("**Pre vs Post comparison**")
st.dataframe(display_df, use_container_width=True)
# Time-series charts
if px is not None and "Date" in daily.columns:
ts_cols = [c for c in ["Orders", "Sales", "Average Order Value", "New Customers Acquired"] if c in daily.columns]
if ts_cols:
st.markdown("**Daily trends**")
for col in ts_cols:
fig = px.line(daily, x="Date", y=col, title=col)
st.plotly_chart(fig, use_container_width=True)
# Normalized multi-series
norm_metrics = [c for c in ["Orders", "Sales", "Average Order Value", "ROAS", "New Customers Acquired"] if c in daily.columns]
if len(norm_metrics) >= 2:
base = daily[["Date"] + norm_metrics].set_index("Date").astype(float)
normalized = pd.DataFrame(index=base.index)
for c in norm_metrics:
col_min = base[c].min()
col_range = base[c].max() - col_min
normalized[c] = safe_divide(base[c] - col_min, col_range)
norm_long = normalized.reset_index().melt(id_vars="Date", var_name="Metric", value_name="Normalized")
fig = px.line(norm_long, x="Date", y="Normalized", color="Metric", title="Daily Metrics (MinβMax Normalized)")
st.plotly_chart(fig, use_container_width=True)
# Correlation heatmap
corr_metrics = [c for c in ["Orders", "Sales", "Average Order Value", "ROAS", "New Customers Acquired"] if c in daily.columns]
if len(corr_metrics) >= 2 and go is not None:
corr = daily[corr_metrics].corr()
heat = go.Figure(data=go.Heatmap(z=corr.values, x=corr.columns, y=corr.index, colorscale="RdBu", zmin=-1, zmax=1))
heat.update_layout(title="Metric Correlation")
st.plotly_chart(heat, use_container_width=True)
# Seasonal decomposition (Orders)
if seasonal_decompose is not None and "Orders" in daily.columns and "Date" in daily.columns:
st.markdown("**Orders: weekly seasonal decomposition**")
ts = daily.set_index("Date")["Orders"].asfreq("D")
try:
decomp = seasonal_decompose(ts, model="additive", period=7)
st.line_chart(pd.DataFrame({
"Observed": decomp.observed,
"Trend": decomp.trend,
"Seasonal": decomp.seasonal,
"Resid": decomp.resid,
}))
except Exception:
st.warning("Unable to compute seasonal decomposition.")
else:
if seasonal_decompose is None:
st.info("Install statsmodels to see seasonal decomposition: pip install statsmodels")
# Prophet forecast (Orders)
if Prophet is not None and "Orders" in daily.columns and "Date" in daily.columns:
st.markdown("**Orders forecast (Prophet)**")
try:
df_prophet = daily.rename(columns={"Date": "ds", "Orders": "y"})[["ds", "y"]]
m = Prophet(daily_seasonality=True)
m.fit(df_prophet)
future = m.make_future_dataframe(periods=30)
fc = m.predict(future)
# Lightweight display using Plotly if available
if px is not None:
fig = px.line(fc, x="ds", y="yhat", title="30-day Forecast of Orders")
fig.add_scatter(x=df_prophet["ds"], y=df_prophet["y"], name="Actual", mode="lines")
st.plotly_chart(fig, use_container_width=True)
else:
st.line_chart(fc.set_index("ds")["yhat"]) # fallback
except Exception:
st.warning("Prophet forecast failed to compute.")
else:
if Prophet is None:
st.info("Install prophet to enable forecasting: pip install prophet")
# Store-Level Analysis Section
st.markdown("---")
st.markdown("## πͺ Store-Level Marketing Analysis")
# Get unique stores from marketing data
if "Store Name" in marketing_df.columns:
stores = ["All Stores"] + sorted(marketing_df["Store Name"].dropna().unique().tolist())
# Store selection interface
st.markdown("**Select Store for Detailed Analysis:**")
# Create store selection buttons in a grid
store_cols = st.columns(4)
# Initialize session state for store selection
if "marketing_selected_store" not in st.session_state:
st.session_state.marketing_selected_store = "All Stores"
for i, store in enumerate(stores):
col_idx = i % 4
with store_cols[col_idx]:
if st.button(store, key=f"marketing_store_{i}", use_container_width=True):
st.session_state.marketing_selected_store = store
# Back to all stores button
if st.session_state.marketing_selected_store != "All Stores":
if st.button("β Back to All Stores", key="marketing_back_to_all", use_container_width=True):
st.session_state.marketing_selected_store = "All Stores"
# Store-specific analysis
if st.session_state.marketing_selected_store != "All Stores":
st.markdown(f"## π {st.session_state.marketing_selected_store} - Marketing Performance Analysis")
# Filter data for selected store
store_data = marketing_df[marketing_df["Store Name"] == st.session_state.marketing_selected_store].copy()
if not store_data.empty:
# Store-specific KPIs
st.markdown("### π― Store Performance Metrics")
store_kpi_cols = st.columns(4)
with store_kpi_cols[0]:
store_orders = store_data.get("Orders", pd.Series(dtype=float)).sum()
st.metric("Total Orders", f"{int(store_orders):,}")
with store_kpi_cols[1]:
store_sales = store_data.get("Sales", pd.Series(dtype=float)).sum()
st.metric("Total Sales", f"${store_sales:,.0f}")
with store_kpi_cols[2]:
store_aov = store_data.get("Average Order Value", pd.Series(dtype=float)).mean()
if pd.notnull(store_aov):
st.metric("Average Order Value", f"${store_aov:,.2f}")
else:
st.metric("Average Order Value", "β")
with store_kpi_cols[3]:
store_roas = store_data.get("ROAS", pd.Series(dtype=float)).mean()
if pd.notnull(store_roas):
st.metric("Average ROAS", f"{store_roas:.2f}x")
else:
st.metric("Average ROAS", "β")
# Customer acquisition metrics
st.markdown("### π₯ Customer Acquisition Analysis")
customer_cols = st.columns(3)
with customer_cols[0]:
new_customers = store_data.get("New Customers Acquired", pd.Series(dtype=float)).sum()
st.metric("New Customers", f"{int(new_customers):,}")
with customer_cols[1]:
total_customers = store_data.get("Total Customers Acquired", pd.Series(dtype=float)).sum()
st.metric("Total Customers", f"{int(total_customers):,}")
with customer_cols[2]:
if total_customers > 0:
repeat_customers = total_customers - new_customers
st.metric("Repeat Customers", f"{int(repeat_customers):,}")
else:
st.metric("Repeat Customers", "β")
# Campaign-level analysis for the store
st.markdown("### π’ Campaign Performance by Store")
# Get campaign columns
campaign_cols = [col for col in store_data.columns if 'campaign' in col.lower() or 'promotion' in col.lower()]
if campaign_cols:
# Campaign summary table
campaign_summary = []
for campaign_col in campaign_cols:
campaign_data = store_data.groupby(campaign_col).agg({
"Orders": "sum",
"Sales": "sum",
"New Customers Acquired": "sum",
"Total Customers Acquired": "sum"
}).reset_index()
for _, row in campaign_data.iterrows():
campaign_summary.append({
"Campaign": row[campaign_col],
"Orders": row["Orders"],
"Sales": row["Sales"],
"New Customers": row["New Customers Acquired"],
"Total Customers": row["Total Customers Acquired"]
})
if campaign_summary:
campaign_df = pd.DataFrame(campaign_summary)
st.dataframe(campaign_df, use_container_width=True)
# Campaign performance charts
if px is not None:
# Orders by campaign
fig_orders = px.bar(campaign_df, x="Campaign", y="Orders",
title=f"{st.session_state.marketing_selected_store} - Orders by Campaign",
color="Orders", color_continuous_scale="viridis")
st.plotly_chart(fig_orders, use_container_width=True)
# Sales by campaign
fig_sales = px.bar(campaign_df, x="Campaign", y="Sales",
title=f"{st.session_state.marketing_selected_store} - Sales by Campaign",
color="Sales", color_continuous_scale="plasma")
st.plotly_chart(fig_sales, use_container_width=True)
else:
st.info("No campaign-specific columns found in the data.")
# Time series analysis for the store
st.markdown("### π Store Performance Over Time")
if "Date" in store_data.columns:
store_data = parse_datetime_column(store_data, "Date")
store_data = store_data.sort_values("Date")
# Daily trends for the store
time_series_cols = [c for c in ["Orders", "Sales", "Average Order Value", "ROAS", "New Customers Acquired"]
if c in store_data.columns]
if time_series_cols:
for col in time_series_cols:
fig = px.line(store_data, x="Date", y=col,
title=f"{st.session_state.marketing_selected_store} - {col} Over Time",
markers=True)
fig.update_layout(xaxis_title="Date", yaxis_title=col)
st.plotly_chart(fig, use_container_width=True)
# Store comparison with other stores
st.markdown("### π Store Performance Comparison")
# Calculate store rankings
all_stores_summary = marketing_df.groupby("Store Name").agg({
"Orders": "sum",
"Sales": "sum",
"ROAS": "mean",
"New Customers Acquired": "sum"
}).reset_index()
# Only show ranking if a specific store is selected (not "All Stores")
if st.session_state.marketing_selected_store != "All Stores":
# Find current store's rank
store_rank = all_stores_summary[all_stores_summary["Store Name"] == st.session_state.marketing_selected_store].index[0] + 1
total_stores = len(all_stores_summary)
# Display ranking metrics
rank_cols = st.columns(4)
with rank_cols[0]:
st.metric("Store Rank", f"#{store_rank} of {total_stores}")
with rank_cols[1]:
store_percentile = (store_rank / total_stores) * 100
st.metric("Percentile", f"{store_percentile:.1f}%")
with rank_cols[2]:
if store_rank > 1:
next_store = all_stores_summary.iloc[store_rank - 2]
gap_orders = store_orders - next_store["Orders"]
st.metric("Orders Gap to Next", f"{int(gap_orders):,}")
else:
st.metric("Orders Gap to Next", "π Top Store")
with rank_cols[3]:
if store_rank > 1:
gap_sales = store_sales - next_store["Sales"]
st.metric("Sales Gap to Next", f"${gap_sales:,.0f}")
else:
st.metric("Sales Gap to Next", "π Top Store")
# Top 5 stores comparison
st.markdown("**π Top 5 Stores by Orders**")
top_stores = all_stores_summary.nlargest(5, "Orders")
fig_top = px.bar(top_stores, x="Store Name", y="Orders",
title="Top 5 Stores - Orders Comparison",
color="Orders", color_continuous_scale="viridis")
st.plotly_chart(fig_top, use_container_width=True)
# Top 5 stores by sales
st.markdown("**π° Top 5 Stores by Sales**")
top_sales = all_stores_summary.nlargest(5, "Sales")
fig_sales_top = px.bar(top_sales, x="Store Name", y="Sales",
title="Top 5 Stores - Sales Comparison",
color="Sales", color_continuous_scale="plasma")
st.plotly_chart(fig_sales_top, use_container_width=True)
else:
# All stores overview
st.markdown("### π All Stores Overview")
if "Store Name" in marketing_df.columns:
# Store summary table
store_summary = marketing_df.groupby("Store Name").agg({
"Orders": "sum",
"Sales": "sum",
"Average Order Value": "mean",
"ROAS": "mean",
"New Customers Acquired": "sum",
"Total Customers Acquired": "sum"
}).reset_index()
st.dataframe(store_summary, use_container_width=True)
# Store performance heatmap
if px is not None:
store_metrics = store_summary.set_index("Store Name")[["Orders", "Sales", "Average Order Value", "ROAS"]]
fig_heatmap = px.imshow(store_metrics.T,
title="Store Performance Heatmap",
color_continuous_scale="viridis",
aspect="auto")
st.plotly_chart(fig_heatmap, use_container_width=True)
else:
st.info("Store Name column not found in marketing data. Store-level analysis unavailable.")
# -------------------------------
# Section: Operations
# -------------------------------
def section_operations(ops_df: pd.DataFrame):
# Colourful section header
if colored_header:
colored_header(
label="βοΈ Operations Analytics",
description="Store performance, ratings, and operational efficiency metrics",
color_name="blue-70"
)
else:
st.markdown("## βοΈ Operations Analytics")
st.markdown("*Store performance, ratings, and operational efficiency metrics*")
if ops_df.empty:
st.info("π Operations CSV not found or empty.")
return
ops_df = parse_datetime_column(ops_df, "Start Date")
ops_df = parse_datetime_column(ops_df, "End Date")
ops_df = add_week_start(ops_df, "Start Date", new_col="Week")
# KPIs and derived metrics
kpis = [
"Total Orders Including Cancelled Orders",
"Total Delivered or Picked Up Orders",
"Total Missing or Incorrect Orders",
"Total Error Charges",
"Total Cancelled Orders",
"Total Downtime in Minutes",
"Average Rating",
]
# Derived
ops_df = ops_df.copy()
if (
"Total Cancelled Orders" in ops_df.columns and
"Total Orders Including Cancelled Orders" in ops_df.columns
):
ops_df["Cancellation Rate"] = safe_divide(
ops_df["Total Cancelled Orders"], ops_df["Total Orders Including Cancelled Orders"]
)
if (
"Total Downtime in Minutes" in ops_df.columns and
"Total Orders Including Cancelled Orders" in ops_df.columns
):
ops_df["Downtime per Order (min)"] = safe_divide(
ops_df["Total Downtime in Minutes"], ops_df["Total Orders Including Cancelled Orders"]
)
# KPI cards
kpi_cols = st.columns(4)
with kpi_cols[0]:
delivered = ops_df.get("Total Delivered or Picked Up Orders", pd.Series(dtype=float)).sum()
st.metric("Delivered Orders", f"{int(delivered):,}")
with kpi_cols[1]:
cancel_rate = ops_df.get("Cancellation Rate", pd.Series(dtype=float)).mean()
if pd.notnull(cancel_rate):
st.metric("Avg Cancellation Rate", f"{cancel_rate*100:.2f}%")
else:
st.metric("Avg Cancellation Rate", "β")
with kpi_cols[2]:
downtime = ops_df.get("Downtime per Order (min)", pd.Series(dtype=float)).mean()
if pd.notnull(downtime):
st.metric("Avg Downtime / Order", f"{downtime:.2f} min")
else:
st.metric("Avg Downtime / Order", "β")
with kpi_cols[3]:
avg_rating = ops_df.get("Average Rating", pd.Series(dtype=float)).mean()
if pd.notnull(avg_rating):
st.metric("Average Rating", f"{avg_rating:.2f}")
else:
st.metric("Average Rating", "β")
# Store summary (sum & mean)
if {"Store ID", "Store Name"}.issubset(ops_df.columns):
numeric_cols = [c for c in kpis if c in ops_df.columns]
if numeric_cols:
store_summary = (
ops_df.groupby(["Store ID", "Store Name"])[numeric_cols]
.agg(["sum", "mean"])
)
# Flatten columns
store_summary.columns = ["_".join(col).strip() for col in store_summary.columns]
st.markdown("**Store summary (sum/mean)**")
st.dataframe(store_summary.reset_index())
# Store-Level Analysis Section
st.markdown("---")
st.markdown("## πͺ Store-Level Operations Analysis")
# Get unique stores from operations data
if "Store Name" in ops_df.columns:
stores = ["All Stores"] + sorted(ops_df["Store Name"].dropna().unique().tolist())
# Store selection interface
st.markdown("**Select Store for Detailed Analysis:**")
# Create store selection buttons in a grid
store_cols = st.columns(4)
# Initialize session state for store selection
if "ops_selected_store" not in st.session_state:
st.session_state.ops_selected_store = "All Stores"
for i, store in enumerate(stores):
col_idx = i % 4
with store_cols[col_idx]:
if st.button(store, key=f"ops_store_{i}", use_container_width=True):
st.session_state.ops_selected_store = store
# Back to all stores button
if st.session_state.ops_selected_store != "All Stores":
if st.button("β Back to All Stores", key="ops_back_to_all", use_container_width=True):
st.session_state.ops_selected_store = "All Stores"
# Store-specific analysis
if st.session_state.ops_selected_store != "All Stores":
st.markdown(f"## π {st.session_state.ops_selected_store} - Operations Performance Analysis")
# Filter data for selected store
store_data = ops_df[ops_df["Store Name"] == st.session_state.ops_selected_store].copy()
if not store_data.empty:
# Store-specific KPIs
st.markdown("### π― Store Operations Metrics")
store_kpi_cols = st.columns(4)
with store_kpi_cols[0]:
store_delivered = store_data.get("Total Delivered or Picked Up Orders", pd.Series(dtype=float)).sum()
st.metric("Total Delivered Orders", f"{int(store_delivered):,}")
with store_kpi_cols[1]:
store_cancelled = store_data.get("Total Cancelled Orders", pd.Series(dtype=float)).sum()
st.metric("Total Cancelled Orders", f"{int(store_cancelled):,}")
with store_kpi_cols[2]:
store_rating = store_data.get("Average Rating", pd.Series(dtype=float)).mean()
if pd.notnull(store_rating):
st.metric("Average Rating", f"{store_rating:.2f}")
else:
st.metric("Average Rating", "β")
with store_kpi_cols[3]:
store_downtime = store_data.get("Total Downtime in Minutes", pd.Series(dtype=float)).sum()
st.metric("Total Downtime", f"{int(store_downtime)} min")
# Operational efficiency metrics
st.markdown("### β‘ Operational Efficiency")
efficiency_cols = st.columns(3)
with efficiency_cols[0]:
if "Total Orders Including Cancelled Orders" in store_data.columns:
total_orders = store_data["Total Orders Including Cancelled Orders"].sum()
fulfillment_rate = (store_delivered / total_orders) * 100 if total_orders > 0 else 0
st.metric("Fulfillment Rate", f"{fulfillment_rate:.1f}%")
else:
st.metric("Fulfillment Rate", "β")
with efficiency_cols[1]:
if "Total Orders Including Cancelled Orders" in store_data.columns:
cancellation_rate = (store_cancelled / total_orders) * 100 if total_orders > 0 else 0
st.metric("Cancellation Rate", f"{cancellation_rate:.1f}%")
else:
st.metric("Cancellation Rate", "β")
with efficiency_cols[2]:
if total_orders > 0:
downtime_per_order = store_downtime / total_orders
st.metric("Downtime per Order", f"{downtime_per_order:.1f} min")
else:
st.metric("Downtime per Order", "β")
# Time series analysis for the store
st.markdown("### π Store Performance Over Time")
if "Week" in store_data.columns:
# Weekly trends for the store
time_series_cols = [c for c in ["Total Delivered or Picked Up Orders", "Total Cancelled Orders",
"Average Rating", "Total Downtime in Minutes"]
if c in store_data.columns]
if time_series_cols:
for col in time_series_cols:
weekly_data = store_data.groupby("Week")[col].mean().reset_index()
fig = px.line(weekly_data, x="Week", y=col,
title=f"{st.session_state.ops_selected_store} - {col} Over Time",
markers=True)
fig.update_layout(xaxis_title="Week", yaxis_title=col)
st.plotly_chart(fig, use_container_width=True)
# Store comparison with other stores
st.markdown("### π Store Performance Comparison")
# Calculate store rankings
all_stores_summary = ops_df.groupby("Store Name").agg({
"Total Delivered or Picked Up Orders": "sum",
"Total Cancelled Orders": "sum",
"Average Rating": "mean",
"Total Downtime in Minutes": "sum"
}).reset_index()
# Only show ranking if a specific store is selected (not "All Stores")
if st.session_state.ops_selected_store != "All Stores":
# Find current store's rank
store_rank = all_stores_summary[all_stores_summary["Store Name"] == st.session_state.ops_selected_store].index[0] + 1
total_stores = len(all_stores_summary)
# Display ranking metrics
rank_cols = st.columns(4)
with rank_cols[0]:
st.metric("Store Rank", f"#{store_rank} of {total_stores}")
with rank_cols[1]:
store_percentile = (store_rank / total_stores) * 100
st.metric("Percentile", f"{store_percentile:.1f}%")
with rank_cols[2]:
if store_rank > 1:
next_store = all_stores_summary.iloc[store_rank - 2]
gap_orders = store_delivered - next_store["Total Delivered or Picked Up Orders"]
st.metric("Orders Gap to Next", f"{int(gap_orders):,}")
else:
st.metric("Orders Gap to Next", "π Top Store")
with rank_cols[3]:
if store_rank > 1:
gap_rating = store_rating - next_store["Average Rating"]
st.metric("Rating Gap to Next", f"{gap_rating:+.2f}")
else:
st.metric("Rating Gap to Next", "π Top Store")
# Top 5 stores comparison
st.markdown("**π Top 5 Stores by Delivered Orders**")
top_stores = all_stores_summary.nlargest(5, "Total Delivered or Picked Up Orders")
fig_top = px.bar(top_stores, x="Store Name", y="Total Delivered or Picked Up Orders",
title="Top 5 Stores - Delivered Orders Comparison",
color="Total Delivered or Picked Up Orders", color_continuous_scale="viridis")
st.plotly_chart(fig_top, use_container_width=True)
# Top 5 stores by rating
st.markdown("**β Top 5 Stores by Average Rating**")
top_rating = all_stores_summary.nlargest(5, "Average Rating")
fig_rating = px.bar(top_rating, x="Store Name", y="Average Rating",
title="Top 5 Stores - Average Rating Comparison",
color="Average Rating", color_continuous_scale="plasma")
st.plotly_chart(fig_rating, use_container_width=True)
else:
st.warning(f"No data found for store: {st.session_state.ops_selected_store}")
else:
# All stores overview
st.markdown("### π All Stores Overview")
if "Store Name" in ops_df.columns:
# Store summary table
store_summary = ops_df.groupby("Store Name").agg({
"Total Delivered or Picked Up Orders": "sum",
"Total Cancelled Orders": "sum",
"Average Rating": "mean",
"Total Downtime in Minutes": "sum"
}).reset_index()
st.dataframe(store_summary, use_container_width=True)
# Store performance heatmap
if px is not None:
store_metrics = store_summary.set_index("Store Name")[["Total Delivered or Picked Up Orders",
"Total Cancelled Orders", "Average Rating",
"Total Downtime in Minutes"]]
fig_heatmap = px.imshow(store_metrics.T,
title="Store Operations Performance Heatmap",
color_continuous_scale="viridis",
aspect="auto")
st.plotly_chart(fig_heatmap, use_container_width=True)
else:
st.info("Store Name column not found in operations data. Store-level analysis unavailable.")
# -------------------------------
# Section: Sales
# -------------------------------
def section_sales(sales_df: pd.DataFrame):
# Colourful section header
if colored_header:
colored_header(
label="π° Sales Analytics",
description="Revenue analysis, order trends, and financial performance",
color_name="orange-70"
)
else:
st.markdown("## π° Sales Analytics")
st.markdown("*Revenue analysis, order trends, and financial performance*")
if sales_df.empty:
st.info("π Sales CSV not found or empty.")
return
sales_df = parse_datetime_column(sales_df, "Start Date")
sales_df = parse_datetime_column(sales_df, "End Date")
sales_df = add_week_start(sales_df, "Start Date", new_col="Week")
# Derived metrics mirroring the notebook
def safe_ratio(numer: pd.Series, denom: pd.Series) -> pd.Series:
return safe_divide(numer, denom)
if {
"Total Orders Including Cancelled Orders",
"Total Delivered or Picked Up Orders",
"Gross Sales",
}.issubset(sales_df.columns):
sales_df = sales_df.assign(
Cancellation_Rate=safe_ratio(
sales_df["Total Orders Including Cancelled Orders"] - sales_df["Total Delivered or Picked Up Orders"],
sales_df["Total Orders Including Cancelled Orders"],
),
Fulfillment_Rate=safe_ratio(
sales_df["Total Delivered or Picked Up Orders"],
sales_df["Total Orders Including Cancelled Orders"],
),
Commission_Rate=safe_ratio(sales_df.get("Total Commission"), sales_df.get("Gross Sales")),
Promo_ROI=safe_ratio(
sales_df.get("Total Promotion Sales | (for historical reference only)"),
sales_df.get("Total Promotion Fees | (for historical reference only)"),
),
Ad_ROI=safe_ratio(
sales_df.get("Total Ad Sales | (for historical reference only)"),
sales_df.get("Total Ad Fees | (for historical reference only)"),
),
Revenue_per_Delivered=safe_ratio(
sales_df.get("Gross Sales"), sales_df.get("Total Delivered or Picked Up Orders")
),
)
# KPIs
kpi_cols = st.columns(4)
with kpi_cols[0]:
gross = sales_df.get("Gross Sales", pd.Series(dtype=float)).sum()
st.metric("Gross Sales", f"${gross:,.0f}")
with kpi_cols[1]:
delivered = sales_df.get("Total Delivered or Picked Up Orders", pd.Series(dtype=float)).sum()
st.metric("Delivered Orders", f"{int(delivered):,}")
with kpi_cols[2]:
aov = sales_df.get("AOV", pd.Series(dtype=float)).mean()
st.metric("AOV", f"${aov:,.2f}" if pd.notnull(aov) else "β")
with kpi_cols[3]:
fulfill_rate = sales_df.get("Fulfillment_Rate", pd.Series(dtype=float)).mean()
st.metric("Avg Fulfillment Rate", f"{fulfill_rate*100:.2f}%" if pd.notnull(fulfill_rate) else "β")
# Weekly aggregation
if {"Store Name", "Week"}.issubset(sales_df.columns):
weekly = sales_df.groupby(["Store Name", "Week"]).agg(
{
k: "sum"
for k in [
"Gross Sales",
"Total Orders Including Cancelled Orders",
"Total Delivered or Picked Up Orders",
]
if k in sales_df.columns
}
)
if "AOV" in sales_df.columns:
weekly["AOV"] = (
sales_df.groupby(["Store Name", "Week"])
["AOV"].mean()
)
for k in [
"Cancellation_Rate",
"Fulfillment_Rate",
"Commission_Rate",
"Promo_ROI",
"Ad_ROI",
"Revenue_per_Delivered",
]:
if k in sales_df.columns:
weekly[k] = sales_df.groupby(["Store Name", "Week"])[k].mean()
weekly = weekly.reset_index()
if px is not None and "Gross Sales" in weekly.columns:
st.markdown("**Weekly Gross Sales per Store**")
fig = px.line(weekly, x="Week", y="Gross Sales", color="Store Name")
st.plotly_chart(fig, use_container_width=True)
# Top 5 stores by total Gross Sales
if px is not None and "Gross Sales" in weekly.columns: