Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions sre2020/Poedjadevie_AuthorFileTouches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import json
from pip._vendor import requests
import csv

# @dictFiles empty dictionary of files
# @lstTokens GitHub authentication tokens
def countfiles(authorlist, lsttokens, repo):
ipage = 1 # url page counter
ct = 0 # token counter
# loop though all the commit pages until the last returned empty page
try:
# loop though all the commit pages until the last returned empty page
while True:
if ct == len(lstTokens):
ct = 0
spage = str(ipage)
commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
'&per_page=100&access_token=' + lsttokens[ct]

ct += 1
content = requests.get(commitsUrl)
jsonCommits = json.loads(content.content)
# break out of the while loop if there are no more commits in the pages
if len(jsonCommits) == 0:
break

# iterate through the list of commits in a page
for shaObject in jsonCommits:
sha = shaObject['sha']
if ct == len(lstTokens):
ct = 0
# For each commit, use the GitHub commit API to extract the files touched by the commit
shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+ '?access_token=' + lstTokens[ct]
ct += 1
content = requests.get(shaUrl)
shaDetails = json.loads(content.content)
filesjson = shaDetails['files']

for filenameObj in filesjson:
filename = filenameObj['filename']
if filename.endswith(('.java','.h','.kt','.js','.cpp')):
author = list()
author.append(filename)
author.append(shaDetails['commit']['author']['name'])
author.append(shaDetails['commit']['author']['date'])
authorlist.append(author)

ipage += 1
except Exception as e:
print(e)
exit(0)

repo = 'scottyab/rootbeer'
# repo = 'Skyscanner/backpack'
# repo = 'mendhak/gpslogger'
# repo = 'k9mail/k-9'

# put your tokens here
lstTokens = ['']

authorlist = list()
countfiles(authorlist, lstTokens, repo)

file = repo.split('/')[1]
#change this to the path of your file
fileOutput = file+'Author.csv'
rows = ["Filename", "Author", "Date"]
fileCSV = open(fileOutput, 'w')
writer = csv.writer(fileCSV)
writer.writerow(rows)

bigcount = None
bigfilename = None
for x in authorlist:
print(x)
rows = [x[0],x[1],x[2]]
writer.writerow(rows)
fileCSV.close()
73 changes: 73 additions & 0 deletions sre2020/Poedjadevie_CollectFiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
from pip._vendor import requests
import csv

# @dictFiles empty dictionary of files
# @lstTokens GitHub authentication tokens
def countfiles(dictfiles, lsttokens, repo):
ipage = 1 # url page counter
ct = 0 # token counter
# loop though all the commit pages until the last returned empty page
try:
# loop though all the commit pages until the last returned empty page
while True:
if ct == len(lstTokens):
ct = 0
spage = str(ipage)
commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
'&per_page=100&access_token=' + lsttokens[ct]

ct += 1
content = requests.get(commitsUrl)
jsonCommits = json.loads(content.content)
# break out of the while loop if there are no more commits in the pages

if len(jsonCommits) == 0:
break
# iterate through the list of commits in a page
for shaObject in jsonCommits:
sha = shaObject['sha']
if ct == len(lstTokens):
ct = 0
# For each commit, use the GitHub commit API to extract the files touched by the commit
shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+ '?access_token=' + lstTokens[ct]
ct += 1

content = requests.get(shaUrl)
shaDetails = json.loads(content.content)
filesjson = shaDetails['files']
for filenameObj in filesjson:
filename = filenameObj['filename']
if filename.endswith(('.java','.h','.kt','.js','.cpp')):
dictfiles[filename] = dictfiles.get(filename, 0) + 1
ipage += 1
except Exception as e:
print("Error receiving data")
print(e)
exit(0)

repo = 'scottyab/rootbeer'
# repo = 'Skyscanner/backpack'
# repo = 'mendhak/gpslogger'
# repo = 'k9mail/k-9'

# put your tokens here
lstTokens = ['']

dictfiles = dict()
countfiles(dictfiles, lstTokens, repo)

file = repo.split('/')[1]

#change this to the path of your file
fileOutput = file+'.csv'
rows = ["Filename", "Touches"]
fileCSV = open(fileOutput, 'w')
writer = csv.writer(fileCSV)
writer.writerow(rows)

for filename, count in dictfiles.items():
rows = [filename, count]
writer.writerow(rows)
fileCSV.close()
146 changes: 146 additions & 0 deletions sre2020/Poedjadevie_Scatterplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import json
from pip._vendor import requests
import csv

# @dictFiles empty dictionary of files
# @lstTokens GitHub authentication tokens
def countfiles(authorlist, dictFiles, lsttokens, repo):
ipage = 1 # url page counter
ct = 0 # token counter
# loop though all the commit pages until the last returned empty page
try:
# loop though all the commit pages until the last returned empty page
while True:
if ct == len(lstTokens):
ct = 0
spage = str(ipage)
commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
'&per_page=100&access_token=' + lsttokens[ct]

ct += 1
content = requests.get(commitsUrl)
jsonCommits = json.loads(content.content)
# break out of the while loop if there are no more commits in the pages
if len(jsonCommits) == 0:
break

# iterate through the list of commits in a page
for shaObject in jsonCommits:
sha = shaObject['sha']
if ct == len(lstTokens):
ct = 0
# For each commit, use the GitHub commit API to extract the files touched by the commit
shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+ '?access_token=' + lstTokens[ct]
ct += 1
content = requests.get(shaUrl)
shaDetails = json.loads(content.content)
filesjson = shaDetails['files']

for filenameObj in filesjson:
filename = filenameObj['filename']
#Only include files that are written in specific back end language
if filename.endswith(('.java','.h','.kt','.js','.cpp')):
#Save the file, author and date in the authorlist
author = list()
author.append(filename)
author.append(shaDetails['commit']['author']['name'])
author.append(shaDetails['commit']['author']['date'])
authorlist.append(author)
#Use the dictfiles to store the ccount of files
dictfiles[filename] = dictfiles.get(filename, 0) + 1
ipage += 1
except Exception as e:
print(e)
exit(0)

repo = 'scottyab/rootbeer'
# repo = 'Skyscanner/backpack'
# repo = 'mendhak/gpslogger'
# repo = 'k9mail/k-9'

# put your tokens here
lstTokens = ['']

dictfiles = dict()
authorlist = list()
countfiles(authorlist, dictfiles, lstTokens, repo)
#Sort the dictfiles in ascending order for the count value
dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1]))

import matplotlib.pyplot as plt
from datetime import datetime
import math

listfiles = (sorted(dictfiles.items(), key=lambda x:x[1]))
top50files = dict()

count=1
#Simple function to rename the files to f01 to f50
if len(listfiles) <=50:
for file in listfiles:
if count<10:
top50files[file[0]]= 'f0' + str(count)
else:
top50files[file[0]]='f' + str(count)
count+=1
else:
for file in listfiles[len(listfiles)-50]:
if count<10:
top50files[file[0]]='f0' + str(count)
else:
top50files[file[0]]='f' + str(count)
count+=1

x = list()
y = list()
c = list()
a = dict()

c1=0

#Function to get the data ready for plotting
for author in authorlist:
if author[0] in top50files:
x.append(top50files[author[0]])
y.append(author[2].split('T')[0])
if author[1] not in a:
a[author[1]] = c1
c1+=25
c.append(a[author[1]])

#Use this to first sort the data based on the dates (y-axis)
lists = sorted(zip(y,x,c))
new_y, new_x, new_c= list(zip(*lists))

weeks= list()
week = 0
date_prev = 0

#Function to get the corresponding weeks for the y-axis
for day in new_y:
date = datetime.strptime(day, '%Y-%m-%d')
if date_prev == 0:
weeks.append(week)
date_prev = date
else:
diff = (date - date_prev).days
if diff < 7:
weeks.append(week)
else:
if diff%7 == 0:
week = week + diff/7
weeks.append(week)
else:
week += math.floor(diff/7)
weeks.append(week)
date_prev = date

plt.scatter(new_x,weeks,c=new_c,s=20)
plt.xlabel("File")
plt.ylabel("Weeks")
plt.grid(True)
plt.show()

file = repo.split('/')[1]
plt.savefig(file+'Plot.png')