-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_profiles.py
More file actions
97 lines (75 loc) · 2.96 KB
/
plot_profiles.py
File metadata and controls
97 lines (75 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# %%
from datetime import date
import pandas as pd
import plotly.express as px
from plotly.graph_objs import Figure
input_path = "data/profiles.csv"
output_dir = "plots"
cutoff_date = date(2024, 3, 1)
def plot_cdf(profiles: pd.DataFrame) -> Figure:
cdf = profiles["last_activity"].value_counts().sort_index().cumsum().reset_index()
cdf.columns = ["last_activity", "cumulative_count"]
cdf["cumulative_count"] = cdf["cumulative_count"] / len(profiles)
return px.line(
cdf,
x="last_activity",
y="cumulative_count",
title="",
labels={"last_activity": "Date", "cumulative_count": ""},
)
def plot_frequency(profiles: pd.DataFrame) -> Figure:
activity_counts = profiles["last_activity"].value_counts().sort_index().reset_index()
activity_counts.columns = ["last_activity", "frequency"]
activity_counts["frequency"] = activity_counts["frequency"] / activity_counts["frequency"].sum()
return px.bar(
activity_counts,
x="last_activity",
y="frequency",
title="Normalized Frequency of Followers' Last Activity",
labels={"last_activity": "Date", "frequency": "Normalized Frequency"},
)
# %%
df = pd.read_csv(input_path)
df["last_activity"] = pd.to_datetime(df["last_activity"], errors="coerce").dt.date
df["created_at"] = pd.to_datetime(df["created_at"], errors="coerce").dt.date
df = df[df["last_activity"] > cutoff_date]
# %% Followers
profiles = df[df["is_follower"]]
fig = plot_cdf(profiles)
fig.update_layout(
title="CDF of Followers' Last Activity",
xaxis_range=[
(profiles["last_activity"].min() - pd.Timedelta(days=5)), # Add margin to the left
profiles["last_activity"].max() + pd.Timedelta(days=5), # Add margin to the right
],
)
fig.write_html(f"{output_dir}/cdf_followers.html")
fig.write_image(f"{output_dir}/cdf_followers.png")
# %%
fig = plot_frequency(profiles)
fig.update_layout(
title="Normalized Frequency of Followers' Last Activity",
)
fig.write_html(f"{output_dir}/frequency_followers.html")
fig.write_image(f"{output_dir}/frequency_followers.png")
print(f"Saved followers plots to {output_dir}/cdf_followers.[png|html] and {output_dir}/frequency_followers.[png|html]")
# %% Follows
profiles = df[df["is_follow"]]
fig = plot_cdf(profiles)
fig.update_layout(
title="CDF of Follows' Last Activity",
xaxis_range=[
(profiles["last_activity"].min() - pd.Timedelta(days=5)), # Add margin to the left
profiles["last_activity"].max() + pd.Timedelta(days=5), # Add margin to the right
],
)
fig.write_html(f"{output_dir}/cdf_follows.html")
fig.write_image(f"{output_dir}/cdf_follows.png")
# %%
fig = plot_frequency(profiles)
fig.update_layout(
title="Normalized Frequency of Follows' Last Activity",
)
fig.write_html(f"{output_dir}/frequency_follows.html")
fig.write_image(f"{output_dir}/frequency_follows.png")
print(f"Saved follows plots to {output_dir}/cdf_follows.[png|html] and {output_dir}/frequency_follows.[png|html]")