diff --git a/sre2020/Poedjadevie_AuthorFileTouches.py b/sre2020/Poedjadevie_AuthorFileTouches.py new file mode 100644 index 0000000..38a033f --- /dev/null +++ b/sre2020/Poedjadevie_AuthorFileTouches.py @@ -0,0 +1,79 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(authorlist, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + if len(jsonCommits) == 0: + break + + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + + for filenameObj in filesjson: + filename = filenameObj['filename'] + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + author = list() + author.append(filename) + author.append(shaDetails['commit']['author']['name']) + author.append(shaDetails['commit']['author']['date']) + authorlist.append(author) + + ipage += 1 + except Exception as e: + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +authorlist = list() +countfiles(authorlist, lstTokens, repo) + +file = repo.split('/')[1] +#change this to the path of your file +fileOutput = file+'Author.csv' +rows = ["Filename", "Author", "Date"] +fileCSV = open(fileOutput, 'w') +writer = csv.writer(fileCSV) +writer.writerow(rows) + +bigcount = None +bigfilename = None +for x in authorlist: + print(x) + rows = [x[0],x[1],x[2]] + writer.writerow(rows) +fileCSV.close() diff --git a/sre2020/Poedjadevie_CollectFiles.py b/sre2020/Poedjadevie_CollectFiles.py new file mode 100644 index 0000000..4f3679a --- /dev/null +++ b/sre2020/Poedjadevie_CollectFiles.py @@ -0,0 +1,73 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(dictfiles, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + + if len(jsonCommits) == 0: + break + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + for filenameObj in filesjson: + filename = filenameObj['filename'] + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + dictfiles[filename] = dictfiles.get(filename, 0) + 1 + ipage += 1 + except Exception as e: + print("Error receiving data") + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +dictfiles = dict() +countfiles(dictfiles, lstTokens, repo) + +file = repo.split('/')[1] + +#change this to the path of your file +fileOutput = file+'.csv' +rows = ["Filename", "Touches"] +fileCSV = open(fileOutput, 'w') +writer = csv.writer(fileCSV) +writer.writerow(rows) + +for filename, count in dictfiles.items(): + rows = [filename, count] + writer.writerow(rows) +fileCSV.close() diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py new file mode 100644 index 0000000..7c5b552 --- /dev/null +++ b/sre2020/Poedjadevie_Scatterplot.py @@ -0,0 +1,146 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(authorlist, dictFiles, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + if len(jsonCommits) == 0: + break + + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + + for filenameObj in filesjson: + filename = filenameObj['filename'] + #Only include files that are written in specific back end language + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + #Save the file, author and date in the authorlist + author = list() + author.append(filename) + author.append(shaDetails['commit']['author']['name']) + author.append(shaDetails['commit']['author']['date']) + authorlist.append(author) + #Use the dictfiles to store the ccount of files + dictfiles[filename] = dictfiles.get(filename, 0) + 1 + ipage += 1 + except Exception as e: + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +dictfiles = dict() +authorlist = list() +countfiles(authorlist, dictfiles, lstTokens, repo) +#Sort the dictfiles in ascending order for the count value +dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1])) + +import matplotlib.pyplot as plt +from datetime import datetime +import math + +listfiles = (sorted(dictfiles.items(), key=lambda x:x[1])) +top50files = dict() + +count=1 +#Simple function to rename the files to f01 to f50 +if len(listfiles) <=50: + for file in listfiles: + if count<10: + top50files[file[0]]= 'f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 +else: + for file in listfiles[len(listfiles)-50]: + if count<10: + top50files[file[0]]='f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 + +x = list() +y = list() +c = list() +a = dict() + +c1=0 + +#Function to get the data ready for plotting +for author in authorlist: + if author[0] in top50files: + x.append(top50files[author[0]]) + y.append(author[2].split('T')[0]) + if author[1] not in a: + a[author[1]] = c1 + c1+=25 + c.append(a[author[1]]) + +#Use this to first sort the data based on the dates (y-axis) +lists = sorted(zip(y,x,c)) +new_y, new_x, new_c= list(zip(*lists)) + +weeks= list() +week = 0 +date_prev = 0 + +#Function to get the corresponding weeks for the y-axis +for day in new_y: + date = datetime.strptime(day, '%Y-%m-%d') + if date_prev == 0: + weeks.append(week) + date_prev = date + else: + diff = (date - date_prev).days + if diff < 7: + weeks.append(week) + else: + if diff%7 == 0: + week = week + diff/7 + weeks.append(week) + else: + week += math.floor(diff/7) + weeks.append(week) + date_prev = date + +plt.scatter(new_x,weeks,c=new_c,s=20) +plt.xlabel("File") +plt.ylabel("Weeks") +plt.grid(True) +plt.show() + +file = repo.split('/')[1] +plt.savefig(file+'Plot.png')