-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgithub_api.py
More file actions
135 lines (118 loc) · 5.06 KB
/
github_api.py
File metadata and controls
135 lines (118 loc) · 5.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import requests, time
from datetime import datetime
from config import GITHUB_TOKEN
HEADERS = {'Authorization': f'token {GITHUB_TOKEN}'}
def search_users_by_topic(topic, keywords=None, limit=20):
"""Initial user search looking at bios."""
kw = " ".join(keywords) if keywords else ""
query = f'{topic} {kw} in:bio'.strip()
r = requests.get("https://api.github.com/search/users",
headers=HEADERS,
params={"q": query, "per_page": limit})
if r.status_code != 200:
print("User search error:", r.status_code, r.json().get("message"))
return []
return r.json()["items"]
def keyword_in_readmes(username, keywords):
"""Returns True if *all* keywords appear in any README of user’s repos."""
if not keywords:
return True # nothing to check
# Build query: keyword1+keyword2+...+in:file+filename:README.md+user:username
kw_query = "+".join(keywords)
code_q = f"{kw_query}+in:file+filename:README.md+user:{username}"
r = requests.get("https://api.github.com/search/code",
headers=HEADERS,
params={"q": code_q, "per_page": 1})
if r.status_code != 200:
print("Code search error:", username, r.status_code)
return False
return r.json()["total_count"] > 0
def find_relevant_users(topic, keywords=None, limit=10):
"""Combine bio search + README check."""
users = search_users_by_topic(topic, keywords, limit=limit*2) # fetch extra to filter
qualified = []
for u in users:
uname = u["login"]
if keyword_in_readmes(uname, keywords or []):
qualified.append(uname)
if len(qualified) >= limit:
break
time.sleep(0.2) # be gentle to the API
return qualified
def get_user_data(username):
url = f'https://api.github.com/users/{username}'
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
print(f"Error fetching user data: {response.json()}")
return None
data = response.json()
return {
"login": data.get("login"),
"name": data.get("name"),
"email": data.get("email"),
"blog": data.get("blog"),
"bio": data.get("bio"), # <- this is bio
"html_url": data.get("html_url"),
"created_at": data.get("created_at")
}
def get_user_repos(username):
url = f'https://api.github.com/users/{username}/repos?per_page=100&type=owner&sort=created'
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
print(f"Error fetching repos: {response.json()}")
return []
return response.json()
def get_earliest_commit_date(repo_owner, repo_name):
url = f'https://api.github.com/repos/{repo_owner}/{repo_name}/commits?per_page=1&order=asc'
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
return None
data = response.json()
if isinstance(data, list) and len(data) > 0:
return data[0]['commit']['author']['date']
return None
def estimate_experience(username):
user_data = get_user_data(username)
if not user_data:
return None
created_at = user_data.get('created_at')
created_year = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ").year
current_year = datetime.now().year
account_age = current_year - created_year
repos = get_user_repos(username)
earliest_repo_year = None
for repo in repos:
repo_created_at = repo.get('created_at')
if repo_created_at:
year = datetime.strptime(repo_created_at, "%Y-%m-%dT%H:%M:%SZ").year
if earliest_repo_year is None or year < earliest_repo_year:
earliest_repo_year = year
if earliest_repo_year:
experience_years = current_year - min(created_year, earliest_repo_year)
else:
experience_years = account_age
return {
"username": username,
"bio": user_data.get("bio", ""),
"created_at": user_data.get("created_at"),
"earliest_repo_year": earliest_repo_year,
"estimated_experience_years": experience_years,
"is_veteran": experience_years >= 10,
"html_url": user_data.get('html_url'),
"email": user_data.get('email')
}
if __name__ == "__main__":
users = find_relevant_users("machine learning", ["ai", "data"])
for username in users:
result = estimate_experience(username)
user_info = get_user_data(username)
if result and result['is_veteran'] and user_info:
print("----- GitHub Experience Report -----")
print(f"Username : {result['username']}")
print(f"Account Created: {result['created_at']}")
print(f"Earliest Repo : {result['earliest_repo_year']}")
print(f"Experience : {result['estimated_experience_years']} years")
print(f"Contact Email : {result['email']}")
print(f"Bio : {user_info.get('bio')}")
print(f"Profile Link : {result['html_url']}")
print("-------------------------------------")