From 18e5a9d5e5da239545da5fc19f5da5cf1615268e Mon Sep 17 00:00:00 2001 From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com> Date: Sat, 6 Feb 2021 13:19:17 +0100 Subject: [PATCH 1/4] Add files via upload --- sre2020/Poedjadevie_AuthorFileTouches.py | 79 ++++++++++++ sre2020/Poedjadevie_CollectFiles.py | 73 ++++++++++++ sre2020/Poedjadevie_Scatterplot.py | 146 +++++++++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 sre2020/Poedjadevie_AuthorFileTouches.py create mode 100644 sre2020/Poedjadevie_CollectFiles.py create mode 100644 sre2020/Poedjadevie_Scatterplot.py diff --git a/sre2020/Poedjadevie_AuthorFileTouches.py b/sre2020/Poedjadevie_AuthorFileTouches.py new file mode 100644 index 0000000..38a033f --- /dev/null +++ b/sre2020/Poedjadevie_AuthorFileTouches.py @@ -0,0 +1,79 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(authorlist, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + if len(jsonCommits) == 0: + break + + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + + for filenameObj in filesjson: + filename = filenameObj['filename'] + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + author = list() + author.append(filename) + author.append(shaDetails['commit']['author']['name']) + author.append(shaDetails['commit']['author']['date']) + authorlist.append(author) + + ipage += 1 + except Exception as e: + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +authorlist = list() +countfiles(authorlist, lstTokens, repo) + +file = repo.split('/')[1] +#change this to the path of your file +fileOutput = file+'Author.csv' +rows = ["Filename", "Author", "Date"] +fileCSV = open(fileOutput, 'w') +writer = csv.writer(fileCSV) +writer.writerow(rows) + +bigcount = None +bigfilename = None +for x in authorlist: + print(x) + rows = [x[0],x[1],x[2]] + writer.writerow(rows) +fileCSV.close() diff --git a/sre2020/Poedjadevie_CollectFiles.py b/sre2020/Poedjadevie_CollectFiles.py new file mode 100644 index 0000000..4f3679a --- /dev/null +++ b/sre2020/Poedjadevie_CollectFiles.py @@ -0,0 +1,73 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(dictfiles, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + + if len(jsonCommits) == 0: + break + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + for filenameObj in filesjson: + filename = filenameObj['filename'] + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + dictfiles[filename] = dictfiles.get(filename, 0) + 1 + ipage += 1 + except Exception as e: + print("Error receiving data") + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +dictfiles = dict() +countfiles(dictfiles, lstTokens, repo) + +file = repo.split('/')[1] + +#change this to the path of your file +fileOutput = file+'.csv' +rows = ["Filename", "Touches"] +fileCSV = open(fileOutput, 'w') +writer = csv.writer(fileCSV) +writer.writerow(rows) + +for filename, count in dictfiles.items(): + rows = [filename, count] + writer.writerow(rows) +fileCSV.close() diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py new file mode 100644 index 0000000..71cf762 --- /dev/null +++ b/sre2020/Poedjadevie_Scatterplot.py @@ -0,0 +1,146 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(authorlist, dictFiles, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + if len(jsonCommits) == 0: + break + + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + + for filenameObj in filesjson: + filename = filenameObj['filename'] + #Only include files that are written in specific back end language + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + #Save the file, author and date in the authorlist + author = list() + author.append(filename) + author.append(shaDetails['commit']['author']['name']) + author.append(shaDetails['commit']['author']['date']) + authorlist.append(author) + #Use the dictfiles to store the ccount of files + dictfiles[filename] = dictfiles.get(filename, 0) + 1 + ipage += 1 + except Exception as e: + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = ['0b0923f7c7f008611cfe72e093bef4c0d8a8c1fe'] + +dictfiles = dict() +authorlist = list() +countfiles(authorlist, dictfiles, lstTokens, repo) +#Sort the dictfiles in ascending order for the count value +dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1])) + +import matplotlib.pyplot as plt +from datetime import datetime +import math + +listfiles = (sorted(dictfiles.items(), key=lambda x:x[1])) +top50files = dict() + +count=1 +#Simple function to rename the files to f01 to f50 +if len(listfiles) <=50: + for file in listfiles: + if count<10: + top50files[file[0]]= 'f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 +else: + for file in listfiles[len(listfiles)-50]: + if count<10: + top50files[file[0]]='f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 + +x = list() +y = list() +c = list() +a = dict() + +c1=0 + +#Function to get the data ready for plotting +for author in authorlist: + if author[0] in top50files: + x.append(top50files[author[0]]) + y.append(author[2].split('T')[0]) + if author[1] not in a: + a[author[1]] = c1 + c1+=25 + c.append(a[author[1]]) + +#Use this to first sort the data based on the dates (y-axis) +lists = sorted(zip(y,x,c)) +new_y, new_x, new_c= list(zip(*lists)) + +weeks= list() +week = 0 +date_prev = 0 + +#Function to get the corresponding weeks for the y-axis +for day in new_y: + date = datetime.strptime(day, '%Y-%m-%d') + if date_prev == 0: + weeks.append(week) + date_prev = date + else: + diff = (date - date_prev).days + if diff < 7: + weeks.append(week) + else: + if diff%7 == 0: + week = week + diff/7 + weeks.append(week) + else: + week += math.floor(diff/7) + weeks.append(week) + date_prev = date + +plt.scatter(new_x,weeks,c=new_c,s=20) +plt.xlabel("File") +plt.ylabel("Weeks") +plt.grid(True) +plt.show() + +file = repo.split('/')[1] +plt.savefig(file+'Plot.png') From 5a37322b54f4a149decfa4bb9c66ebbb32ed32b0 Mon Sep 17 00:00:00 2001 From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com> Date: Sat, 6 Feb 2021 13:23:07 +0100 Subject: [PATCH 2/4] Update Poedjadevie_Scatterplot.py --- sre2020/Poedjadevie_Scatterplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py index 71cf762..7c5b552 100644 --- a/sre2020/Poedjadevie_Scatterplot.py +++ b/sre2020/Poedjadevie_Scatterplot.py @@ -60,7 +60,7 @@ def countfiles(authorlist, dictFiles, lsttokens, repo): # repo = 'k9mail/k-9' # put your tokens here -lstTokens = ['0b0923f7c7f008611cfe72e093bef4c0d8a8c1fe'] +lstTokens = [''] dictfiles = dict() authorlist = list() From c03d124a7438de07afae60e7b20817d6b1f78741 Mon Sep 17 00:00:00 2001 From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com> Date: Sat, 6 Feb 2021 13:23:41 +0100 Subject: [PATCH 3/4] Delete Poedjadevie_Scatterplot.py --- sre2020/Poedjadevie_Scatterplot.py | 146 ----------------------------- 1 file changed, 146 deletions(-) delete mode 100644 sre2020/Poedjadevie_Scatterplot.py diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py deleted file mode 100644 index 7c5b552..0000000 --- a/sre2020/Poedjadevie_Scatterplot.py +++ /dev/null @@ -1,146 +0,0 @@ -import json -from pip._vendor import requests -import csv - -# @dictFiles empty dictionary of files -# @lstTokens GitHub authentication tokens -def countfiles(authorlist, dictFiles, lsttokens, repo): - ipage = 1 # url page counter - ct = 0 # token counter -# loop though all the commit pages until the last returned empty page - try: - # loop though all the commit pages until the last returned empty page - while True: - if ct == len(lstTokens): - ct = 0 - spage = str(ipage) - commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ - '&per_page=100&access_token=' + lsttokens[ct] - - ct += 1 - content = requests.get(commitsUrl) - jsonCommits = json.loads(content.content) - # break out of the while loop if there are no more commits in the pages - if len(jsonCommits) == 0: - break - - # iterate through the list of commits in a page - for shaObject in jsonCommits: - sha = shaObject['sha'] - if ct == len(lstTokens): - ct = 0 - # For each commit, use the GitHub commit API to extract the files touched by the commit - shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ - + '?access_token=' + lstTokens[ct] - ct += 1 - content = requests.get(shaUrl) - shaDetails = json.loads(content.content) - filesjson = shaDetails['files'] - - for filenameObj in filesjson: - filename = filenameObj['filename'] - #Only include files that are written in specific back end language - if filename.endswith(('.java','.h','.kt','.js','.cpp')): - #Save the file, author and date in the authorlist - author = list() - author.append(filename) - author.append(shaDetails['commit']['author']['name']) - author.append(shaDetails['commit']['author']['date']) - authorlist.append(author) - #Use the dictfiles to store the ccount of files - dictfiles[filename] = dictfiles.get(filename, 0) + 1 - ipage += 1 - except Exception as e: - print(e) - exit(0) - -repo = 'scottyab/rootbeer' -# repo = 'Skyscanner/backpack' -# repo = 'mendhak/gpslogger' -# repo = 'k9mail/k-9' - -# put your tokens here -lstTokens = [''] - -dictfiles = dict() -authorlist = list() -countfiles(authorlist, dictfiles, lstTokens, repo) -#Sort the dictfiles in ascending order for the count value -dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1])) - -import matplotlib.pyplot as plt -from datetime import datetime -import math - -listfiles = (sorted(dictfiles.items(), key=lambda x:x[1])) -top50files = dict() - -count=1 -#Simple function to rename the files to f01 to f50 -if len(listfiles) <=50: - for file in listfiles: - if count<10: - top50files[file[0]]= 'f0' + str(count) - else: - top50files[file[0]]='f' + str(count) - count+=1 -else: - for file in listfiles[len(listfiles)-50]: - if count<10: - top50files[file[0]]='f0' + str(count) - else: - top50files[file[0]]='f' + str(count) - count+=1 - -x = list() -y = list() -c = list() -a = dict() - -c1=0 - -#Function to get the data ready for plotting -for author in authorlist: - if author[0] in top50files: - x.append(top50files[author[0]]) - y.append(author[2].split('T')[0]) - if author[1] not in a: - a[author[1]] = c1 - c1+=25 - c.append(a[author[1]]) - -#Use this to first sort the data based on the dates (y-axis) -lists = sorted(zip(y,x,c)) -new_y, new_x, new_c= list(zip(*lists)) - -weeks= list() -week = 0 -date_prev = 0 - -#Function to get the corresponding weeks for the y-axis -for day in new_y: - date = datetime.strptime(day, '%Y-%m-%d') - if date_prev == 0: - weeks.append(week) - date_prev = date - else: - diff = (date - date_prev).days - if diff < 7: - weeks.append(week) - else: - if diff%7 == 0: - week = week + diff/7 - weeks.append(week) - else: - week += math.floor(diff/7) - weeks.append(week) - date_prev = date - -plt.scatter(new_x,weeks,c=new_c,s=20) -plt.xlabel("File") -plt.ylabel("Weeks") -plt.grid(True) -plt.show() - -file = repo.split('/')[1] -plt.savefig(file+'Plot.png') From 1d34f77d6dc3e1f1e58f1c750add99464ccb6af1 Mon Sep 17 00:00:00 2001 From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com> Date: Sat, 6 Feb 2021 13:24:06 +0100 Subject: [PATCH 4/4] Add files via upload --- sre2020/Poedjadevie_Scatterplot.py | 146 +++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 sre2020/Poedjadevie_Scatterplot.py diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py new file mode 100644 index 0000000..7c5b552 --- /dev/null +++ b/sre2020/Poedjadevie_Scatterplot.py @@ -0,0 +1,146 @@ +import json +from pip._vendor import requests +import csv + +# @dictFiles empty dictionary of files +# @lstTokens GitHub authentication tokens +def countfiles(authorlist, dictFiles, lsttokens, repo): + ipage = 1 # url page counter + ct = 0 # token counter +# loop though all the commit pages until the last returned empty page + try: + # loop though all the commit pages until the last returned empty page + while True: + if ct == len(lstTokens): + ct = 0 + spage = str(ipage) + commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \ + '&per_page=100&access_token=' + lsttokens[ct] + + ct += 1 + content = requests.get(commitsUrl) + jsonCommits = json.loads(content.content) + # break out of the while loop if there are no more commits in the pages + if len(jsonCommits) == 0: + break + + # iterate through the list of commits in a page + for shaObject in jsonCommits: + sha = shaObject['sha'] + if ct == len(lstTokens): + ct = 0 + # For each commit, use the GitHub commit API to extract the files touched by the commit + shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \ + + '?access_token=' + lstTokens[ct] + ct += 1 + content = requests.get(shaUrl) + shaDetails = json.loads(content.content) + filesjson = shaDetails['files'] + + for filenameObj in filesjson: + filename = filenameObj['filename'] + #Only include files that are written in specific back end language + if filename.endswith(('.java','.h','.kt','.js','.cpp')): + #Save the file, author and date in the authorlist + author = list() + author.append(filename) + author.append(shaDetails['commit']['author']['name']) + author.append(shaDetails['commit']['author']['date']) + authorlist.append(author) + #Use the dictfiles to store the ccount of files + dictfiles[filename] = dictfiles.get(filename, 0) + 1 + ipage += 1 + except Exception as e: + print(e) + exit(0) + +repo = 'scottyab/rootbeer' +# repo = 'Skyscanner/backpack' +# repo = 'mendhak/gpslogger' +# repo = 'k9mail/k-9' + +# put your tokens here +lstTokens = [''] + +dictfiles = dict() +authorlist = list() +countfiles(authorlist, dictfiles, lstTokens, repo) +#Sort the dictfiles in ascending order for the count value +dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1])) + +import matplotlib.pyplot as plt +from datetime import datetime +import math + +listfiles = (sorted(dictfiles.items(), key=lambda x:x[1])) +top50files = dict() + +count=1 +#Simple function to rename the files to f01 to f50 +if len(listfiles) <=50: + for file in listfiles: + if count<10: + top50files[file[0]]= 'f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 +else: + for file in listfiles[len(listfiles)-50]: + if count<10: + top50files[file[0]]='f0' + str(count) + else: + top50files[file[0]]='f' + str(count) + count+=1 + +x = list() +y = list() +c = list() +a = dict() + +c1=0 + +#Function to get the data ready for plotting +for author in authorlist: + if author[0] in top50files: + x.append(top50files[author[0]]) + y.append(author[2].split('T')[0]) + if author[1] not in a: + a[author[1]] = c1 + c1+=25 + c.append(a[author[1]]) + +#Use this to first sort the data based on the dates (y-axis) +lists = sorted(zip(y,x,c)) +new_y, new_x, new_c= list(zip(*lists)) + +weeks= list() +week = 0 +date_prev = 0 + +#Function to get the corresponding weeks for the y-axis +for day in new_y: + date = datetime.strptime(day, '%Y-%m-%d') + if date_prev == 0: + weeks.append(week) + date_prev = date + else: + diff = (date - date_prev).days + if diff < 7: + weeks.append(week) + else: + if diff%7 == 0: + week = week + diff/7 + weeks.append(week) + else: + week += math.floor(diff/7) + weeks.append(week) + date_prev = date + +plt.scatter(new_x,weeks,c=new_c,s=20) +plt.xlabel("File") +plt.ylabel("Weeks") +plt.grid(True) +plt.show() + +file = repo.split('/')[1] +plt.savefig(file+'Plot.png')