-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_profiles.py
More file actions
147 lines (113 loc) · 4.49 KB
/
get_profiles.py
File metadata and controls
147 lines (113 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import argparse
from dataclasses import dataclass
from datetime import date, datetime
from atproto_client.models.app.bsky.actor.defs import ProfileView
from atproto import Client
from utils import APICall, dumb_retry, parse_utc_date, load_credentials
@dataclass
class Profile:
did: str
handle: str
created_at: date
last_activity: date
is_follow: bool
is_follower: bool
@staticmethod
def csv_header() -> str:
return "did,handle,created_at,last_activity,is_follow,is_follower\n"
def csv_row(self) -> str:
return f"{self.did},{self.handle},{self.created_at},{self.last_activity},{self.is_follow},{self.is_follower}\n"
class Getter:
def __init__(self, client: Client, verbose: bool = False):
self.client = client
self.verbose = verbose
def to_profile(self, pv: ProfileView, is_follow: bool = False, is_follower: bool = False) -> Profile | None:
created_at = parse_utc_date(pv.created_at)
last_activity = self.last_activity(pv, created_at)
if created_at is None or last_activity is None:
return None
return Profile(
did=pv.did,
handle=pv.handle,
created_at=created_at,
last_activity=last_activity,
is_follow=is_follow,
is_follower=is_follower,
)
def get_profiles(self) -> list[Profile]:
res = {}
followers = self.get_followers()
for i, pv in enumerate(followers):
print(f"Building followers profile {i}/{len(followers)}", end="\r", flush=True)
profile = self.to_profile(pv, is_follower=True)
if profile is None:
print(f"Skipping {pv.handle} ({pv.did}) due to missing timestamps")
continue
res[pv.did] = profile
if self.verbose:
print(f"\nFinished building followers profiles")
follows = self.get_follows()
for i, pv in enumerate(follows):
print(f"Building follows profile {i}/{len(follows)}", end="\r", flush=True)
if pv.did not in res:
profile = self.to_profile(pv, is_follow=True)
if profile is None:
print(f"Skipping {pv.handle} ({pv.did}) due to missing timestamps")
continue
res[pv.did] = profile
else:
res[pv.did].is_follow = True
if self.verbose:
print(f"\nFinished building follows profiles")
return list(res.values())
def get_follows(self) -> list[ProfileView]:
return self.get_all(self.client.get_follows, "follows")
def get_followers(self) -> list[ProfileView]:
return self.get_all(self.client.get_followers, "followers")
def get_all(self, call: APICall, attribute_name: str) -> list[ProfileView]:
cursor, items = None, []
while True:
res = dumb_retry(call)(self.client.me.handle, cursor, 100)
batch = getattr(res, attribute_name, [])
items.extend(batch)
if self.verbose:
print(f"Got {len(items)} {attribute_name}", end="\r", flush=True)
cursor = res.cursor
if not cursor or not batch:
break
if self.verbose:
print(f"\nFinished getting {attribute_name}")
return items
def last_activity(self, pv: ProfileView, created_at: date | None) -> date | None:
res = dumb_retry(self.client.get_author_feed)(pv.handle, limit=1)
return parse_utc_date(res.feed[-1].post.record.created_at) if res.feed else created_at
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--verbose",
action="store_true",
help="Whether to print verbose output",
)
parser.add_argument(
"--dotenv-path",
type=str,
default=".env",
)
parser.add_argument(
"--out-path",
type=str,
default="./data/profiles.csv",
)
args = parser.parse_args()
start_time = datetime.now()
client = Client()
client.login(*load_credentials(dotenv_path=args.dotenv_path))
if args.verbose:
print("Logged in successfully")
profiles = Getter(client, args.verbose).get_profiles()
with open(args.out_path, "w") as f:
f.write(Profile.csv_header())
for p in profiles:
f.write(p.csv_row())
if args.verbose:
print(f"Done! Wrote {len(profiles)} profiles to {args.out_path}, took {datetime.now() - start_time}")