-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathseed_sample_data.py
More file actions
157 lines (137 loc) · 6.85 KB
/
seed_sample_data.py
File metadata and controls
157 lines (137 loc) · 6.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
Seed the database with sample data: ~100 users, plans, locations, devices, payments, sessions.
Run from project root: python seed_sample_data.py
Re-runnable: clears existing data then inserts.
"""
import random
from datetime import datetime, timedelta
from db.connection import get_connection
# Fixed seed for reproducible data
random.seed(42)
PLANS = [
("Basic", 9.99, 1),
("Standard", 14.99, 2),
("Premium", 19.99, 4),
("Family", 24.99, 6),
]
LOCATION_DESCRIPTIONS = [
"Seattle Home", "NYC Cafe", "Austin Office", "LA Apartment", "Chicago Home",
"Denver Office", "Portland Home", "SF Coffee Shop", "Boston Library",
"Miami Beach", "Phoenix Home", "Atlanta Office", "Dallas Home", "Minneapolis Cafe",
"Detroit Home", "Philadelphia Office", "Houston Home", "San Diego Cafe",
"Las Vegas Hotel", "DC Office", "Seattle Office", "NYC Home", "Austin Home",
"LA Home", "Chicago Cafe", "Denver Home", "Portland Cafe", "SF Home",
"Boston Home", "Miami Home", "Phoenix Office", "Atlanta Home", "Dallas Office",
"Minneapolis Home", "Detroit Office", "Philadelphia Home", "Houston Office",
"San Diego Home", "DC Home", "Seattle Cafe", "NYC Office", "Austin Cafe",
"LA Office", "Chicago Office", "Denver Cafe", "Portland Office", "SF Office",
"Boston Office", "Miami Office",
]
DEVICE_NAMES = ["iPhone", "Android", "Chrome", "Safari", "TV", "iPad", "Fire TV", "Roku"]
ACCOUNT_STATUSES = ["active", "inactive", "suspended"]
def main():
with get_connection() as conn:
cur = conn.cursor()
# --- Clear existing data (FK-safe order) ---
cur.execute('DELETE FROM session;')
cur.execute('DELETE FROM device;')
cur.execute('DELETE FROM "user";')
cur.execute('DELETE FROM location;')
cur.execute('DELETE FROM subscription_plan;')
cur.execute('DELETE FROM account_status;')
# Reset sequences so IDs start at 1
cur.execute("ALTER SEQUENCE subscription_plan_plan_id_seq RESTART WITH 1;")
cur.execute("ALTER SEQUENCE account_status_status_id_seq RESTART WITH 1;")
cur.execute("ALTER SEQUENCE location_location_id_seq RESTART WITH 1;")
cur.execute('ALTER SEQUENCE "user_user_id_seq" RESTART WITH 1;')
cur.execute("ALTER SEQUENCE device_device_id_seq RESTART WITH 1;")
cur.execute("ALTER SEQUENCE session_session_id_seq RESTART WITH 1;")
# --- Insert account_status ---
for status_name in ACCOUNT_STATUSES:
cur.execute(
"INSERT INTO account_status (status_name) VALUES (%s);",
(status_name,),
)
num_statuses = len(ACCOUNT_STATUSES)
# --- Insert subscription_plan ---
for name, price, max_streams in PLANS:
cur.execute(
"INSERT INTO subscription_plan (name, price, max_streams) VALUES (%s, %s, %s);",
(name, price, max_streams),
)
num_plans = len(PLANS)
# --- Insert location ---
num_locations = len(LOCATION_DESCRIPTIONS)
for desc in LOCATION_DESCRIPTIONS:
lat = round(random.uniform(25.0, 48.0), 5) if random.random() > 0.3 else None
lon = round(random.uniform(-125.0, -70.0), 5) if lat is not None else None
cur.execute(
"INSERT INTO location (latitude, longitude, description) VALUES (%s, %s, %s);",
(lat, lon, desc),
)
# --- Insert users (100) ---
num_users = 100
for i in range(1, num_users + 1):
name = "Sample User %d" % i
email = "sampleuser%d@example.com" % i
plan_id = random.randint(1, num_plans)
status_id = random.randint(1, num_statuses)
home_location_id = random.randint(1, num_locations) if random.random() > 0.1 else None
cur.execute(
'INSERT INTO "user" (name, email, plan_id, status_id, home_location_id) VALUES (%s, %s, %s, %s, %s);',
(name, email, plan_id, status_id, home_location_id),
)
# --- Insert devices (~2-3 per user) ---
device_rows = [] # (user_id, name, is_trusted, last_seen_at_home)
for user_id in range(1, num_users + 1):
n_devices = random.randint(2, 3)
names_used = random.sample(DEVICE_NAMES, min(n_devices, len(DEVICE_NAMES)))
if n_devices > len(names_used):
names_used += random.choices(DEVICE_NAMES, k=n_devices - len(names_used))
for j, dname in enumerate(names_used[:n_devices]):
is_trusted = j == 0
last_seen = (datetime.utcnow() - timedelta(days=random.randint(0, 60))) if random.random() > 0.4 else None
device_rows.append((user_id, dname, is_trusted, last_seen))
for idx, (user_id, dname, is_trusted, last_seen) in enumerate(device_rows):
fingerprint = "seed-device-%d" % (idx + 1)
cur.execute(
"INSERT INTO device (user_id, name, device_fingerprint, is_trusted, last_seen_at_home) VALUES (%s, %s, %s, %s, %s);",
(user_id, dname, fingerprint, is_trusted, last_seen),
)
num_devices = len(device_rows)
# Build user_id -> list of device_ids (1-based device_id after insert order)
cur.execute("SELECT device_id, user_id FROM device ORDER BY device_id;")
device_user = cur.fetchall()
user_device_ids = {}
for dev_id, uid in device_user:
user_device_ids.setdefault(uid, []).append(dev_id)
# --- Insert sessions (~5-20 per user, some active) ---
num_sessions = 0
session_base = datetime.utcnow() - timedelta(days=180)
for user_id in range(1, num_users + 1):
dev_ids = user_device_ids.get(user_id, [])
if not dev_ids:
continue
loc_ids = list(range(1, num_locations + 1))
n_sessions = random.randint(5, 20)
for _ in range(n_sessions):
start_time = session_base + timedelta(days=random.randint(0, 180), hours=random.randint(0, 23))
# ~10% of sessions active (end_time NULL)
if random.random() < 0.1:
end_time = None
else:
end_time = start_time + timedelta(minutes=random.randint(15, 180))
device_id = random.choice(dev_ids)
location_id = random.choice(loc_ids)
cur.execute(
"INSERT INTO session (user_id, device_id, location_id, start_time, end_time) VALUES (%s, %s, %s, %s, %s);",
(user_id, device_id, location_id, start_time, end_time),
)
num_sessions += 1
print(
"Inserted %d plans, %d locations, %d users, %d devices, %d sessions."
% (num_plans, num_locations, num_users, num_devices, num_sessions)
)
print("Done.")
if __name__ == "__main__":
main()