From 18e5a9d5e5da239545da5fc19f5da5cf1615268e Mon Sep 17 00:00:00 2001
From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:19:17 +0100
Subject: [PATCH 1/4] Add files via upload

---
 sre2020/Poedjadevie_AuthorFileTouches.py |  79 ++++++++++++
 sre2020/Poedjadevie_CollectFiles.py      |  73 ++++++++++++
 sre2020/Poedjadevie_Scatterplot.py       | 146 +++++++++++++++++++++++
 3 files changed, 298 insertions(+)
 create mode 100644 sre2020/Poedjadevie_AuthorFileTouches.py
 create mode 100644 sre2020/Poedjadevie_CollectFiles.py
 create mode 100644 sre2020/Poedjadevie_Scatterplot.py

diff --git a/sre2020/Poedjadevie_AuthorFileTouches.py b/sre2020/Poedjadevie_AuthorFileTouches.py
new file mode 100644
index 0000000..38a033f
--- /dev/null
+++ b/sre2020/Poedjadevie_AuthorFileTouches.py
@@ -0,0 +1,79 @@
+import json
+from pip._vendor import requests
+import csv
+
+# @dictFiles empty dictionary of files
+# @lstTokens GitHub authentication tokens
+def countfiles(authorlist, lsttokens, repo):
+    ipage = 1  # url page counter
+    ct = 0  # token counter
+# loop though all the commit pages until the last returned empty page
+    try:
+        # loop though all the commit pages until the last returned empty page
+        while True:
+            if ct == len(lstTokens):
+                ct = 0
+            spage = str(ipage)
+            commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
+                        '&per_page=100&access_token=' + lsttokens[ct]
+
+            ct += 1
+            content = requests.get(commitsUrl)
+            jsonCommits = json.loads(content.content)
+            # break out of the while loop if there are no more commits in the pages
+            if len(jsonCommits) == 0:
+                break
+                
+            # iterate through the list of commits in a page
+            for shaObject in jsonCommits:
+                sha = shaObject['sha']
+                if ct == len(lstTokens):
+                    ct = 0
+                # For each commit, use the GitHub commit API to extract the files touched by the commit
+                shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+                         + '?access_token=' + lstTokens[ct]
+                ct += 1
+                content = requests.get(shaUrl)
+                shaDetails = json.loads(content.content)
+                filesjson = shaDetails['files']
+                
+                for filenameObj in filesjson:
+                    filename = filenameObj['filename']
+                    if filename.endswith(('.java','.h','.kt','.js','.cpp')):
+                        author = list()
+                        author.append(filename)
+                        author.append(shaDetails['commit']['author']['name'])
+                        author.append(shaDetails['commit']['author']['date'])
+                        authorlist.append(author)
+                        
+            ipage += 1
+    except Exception as e:
+        print(e)
+        exit(0)
+
+repo = 'scottyab/rootbeer'
+# repo = 'Skyscanner/backpack'
+# repo = 'mendhak/gpslogger'
+# repo = 'k9mail/k-9'
+
+# put your tokens here
+lstTokens = ['']
+
+authorlist = list()
+countfiles(authorlist, lstTokens, repo)
+
+file = repo.split('/')[1]
+#change this to the path of your file
+fileOutput = file+'Author.csv'
+rows = ["Filename", "Author", "Date"]
+fileCSV = open(fileOutput, 'w')
+writer = csv.writer(fileCSV)
+writer.writerow(rows)
+
+bigcount = None
+bigfilename = None
+for x in authorlist:
+    print(x)
+    rows = [x[0],x[1],x[2]]
+    writer.writerow(rows)
+fileCSV.close()
diff --git a/sre2020/Poedjadevie_CollectFiles.py b/sre2020/Poedjadevie_CollectFiles.py
new file mode 100644
index 0000000..4f3679a
--- /dev/null
+++ b/sre2020/Poedjadevie_CollectFiles.py
@@ -0,0 +1,73 @@
+import json
+from pip._vendor import requests
+import csv
+
+# @dictFiles empty dictionary of files
+# @lstTokens GitHub authentication tokens
+def countfiles(dictfiles, lsttokens, repo):
+    ipage = 1  # url page counter
+    ct = 0  # token counter
+# loop though all the commit pages until the last returned empty page
+    try:
+        # loop though all the commit pages until the last returned empty page
+        while True:
+            if ct == len(lstTokens):
+                ct = 0
+            spage = str(ipage)
+            commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
+                        '&per_page=100&access_token=' + lsttokens[ct]
+            
+            ct += 1
+            content = requests.get(commitsUrl)
+            jsonCommits = json.loads(content.content)
+            # break out of the while loop if there are no more commits in the pages
+            
+            if len(jsonCommits) == 0:
+                break
+            # iterate through the list of commits in a page
+            for shaObject in jsonCommits:
+                sha = shaObject['sha']
+                if ct == len(lstTokens):
+                    ct = 0
+                # For each commit, use the GitHub commit API to extract the files touched by the commit
+                shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+                         + '?access_token=' + lstTokens[ct]
+                ct += 1
+                
+                content = requests.get(shaUrl)
+                shaDetails = json.loads(content.content)
+                filesjson = shaDetails['files']
+                for filenameObj in filesjson:
+                    filename = filenameObj['filename']
+                    if filename.endswith(('.java','.h','.kt','.js','.cpp')):
+                        dictfiles[filename] = dictfiles.get(filename, 0) + 1
+            ipage += 1
+    except Exception as e:
+        print("Error receiving data")
+        print(e)
+        exit(0)
+
+repo = 'scottyab/rootbeer'
+# repo = 'Skyscanner/backpack'
+# repo = 'mendhak/gpslogger'
+# repo = 'k9mail/k-9'
+
+# put your tokens here
+lstTokens = ['']
+
+dictfiles = dict()
+countfiles(dictfiles, lstTokens, repo)
+
+file = repo.split('/')[1]
+
+#change this to the path of your file
+fileOutput = file+'.csv'
+rows = ["Filename", "Touches"]
+fileCSV = open(fileOutput, 'w')
+writer = csv.writer(fileCSV)
+writer.writerow(rows)
+
+for filename, count in dictfiles.items():
+    rows = [filename, count]
+    writer.writerow(rows)
+fileCSV.close()
diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py
new file mode 100644
index 0000000..71cf762
--- /dev/null
+++ b/sre2020/Poedjadevie_Scatterplot.py
@@ -0,0 +1,146 @@
+import json
+from pip._vendor import requests
+import csv
+
+# @dictFiles empty dictionary of files
+# @lstTokens GitHub authentication tokens
+def countfiles(authorlist, dictFiles, lsttokens, repo):
+    ipage = 1  # url page counter
+    ct = 0  # token counter
+# loop though all the commit pages until the last returned empty page
+    try:
+        # loop though all the commit pages until the last returned empty page
+        while True:
+            if ct == len(lstTokens):
+                ct = 0
+            spage = str(ipage)
+            commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
+                        '&per_page=100&access_token=' + lsttokens[ct]
+
+            ct += 1
+            content = requests.get(commitsUrl)
+            jsonCommits = json.loads(content.content)
+            # break out of the while loop if there are no more commits in the pages
+            if len(jsonCommits) == 0:
+                break
+                
+            # iterate through the list of commits in a page
+            for shaObject in jsonCommits:
+                sha = shaObject['sha']
+                if ct == len(lstTokens):
+                    ct = 0
+                # For each commit, use the GitHub commit API to extract the files touched by the commit
+                shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+                         + '?access_token=' + lstTokens[ct]
+                ct += 1
+                content = requests.get(shaUrl)
+                shaDetails = json.loads(content.content)
+                filesjson = shaDetails['files']
+                
+                for filenameObj in filesjson:
+                    filename = filenameObj['filename']
+                    #Only include files that are written in specific back end language
+                    if filename.endswith(('.java','.h','.kt','.js','.cpp')):
+                        #Save the file, author and date in the authorlist
+                        author = list()
+                        author.append(filename)
+                        author.append(shaDetails['commit']['author']['name'])
+                        author.append(shaDetails['commit']['author']['date'])
+                        authorlist.append(author)
+                        #Use the dictfiles to store the ccount of files
+                        dictfiles[filename] = dictfiles.get(filename, 0) + 1 
+            ipage += 1
+    except Exception as e:
+        print(e)
+        exit(0)
+        
+repo = 'scottyab/rootbeer'
+# repo = 'Skyscanner/backpack'
+# repo = 'mendhak/gpslogger'
+# repo = 'k9mail/k-9'
+
+# put your tokens here
+lstTokens = ['0b0923f7c7f008611cfe72e093bef4c0d8a8c1fe']
+
+dictfiles = dict()
+authorlist = list()
+countfiles(authorlist, dictfiles, lstTokens, repo)
+#Sort the dictfiles in ascending order for the count value
+dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1]))
+
+import matplotlib.pyplot as plt
+from datetime import datetime
+import math
+
+listfiles = (sorted(dictfiles.items(), key=lambda x:x[1]))
+top50files = dict()
+
+count=1
+#Simple function to rename the files to f01 to f50
+if len(listfiles) <=50:
+    for file in listfiles:
+        if count<10:
+            top50files[file[0]]= 'f0' + str(count)
+        else:
+            top50files[file[0]]='f' + str(count)
+        count+=1
+else:
+    for file in listfiles[len(listfiles)-50]:
+        if count<10:
+            top50files[file[0]]='f0' + str(count)
+        else:
+            top50files[file[0]]='f' + str(count)
+        count+=1
+
+x = list()
+y = list()
+c = list()
+a = dict()
+
+c1=0
+
+#Function to get the data ready for plotting
+for author in authorlist:
+    if author[0] in top50files:
+        x.append(top50files[author[0]])
+        y.append(author[2].split('T')[0])
+        if author[1] not in a:
+            a[author[1]] = c1
+            c1+=25
+        c.append(a[author[1]])   
+
+#Use this to first sort the data based on the dates (y-axis)
+lists = sorted(zip(y,x,c))
+new_y, new_x, new_c= list(zip(*lists))
+
+weeks= list()
+week = 0
+date_prev = 0
+
+#Function to get the corresponding weeks for the y-axis
+for day in new_y:
+    date = datetime.strptime(day, '%Y-%m-%d')
+    if date_prev == 0:
+        weeks.append(week)
+        date_prev = date
+    else:
+        diff = (date - date_prev).days
+        if diff < 7:
+            weeks.append(week)
+        else:
+            if diff%7 == 0:
+                week = week + diff/7
+                weeks.append(week)
+            else:
+                week += math.floor(diff/7)
+                weeks.append(week)
+        date_prev = date
+
+plt.scatter(new_x,weeks,c=new_c,s=20)
+plt.xlabel("File")
+plt.ylabel("Weeks")
+plt.grid(True)
+plt.show()
+
+file = repo.split('/')[1]
+plt.savefig(file+'Plot.png')

From 5a37322b54f4a149decfa4bb9c66ebbb32ed32b0 Mon Sep 17 00:00:00 2001
From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:23:07 +0100
Subject: [PATCH 2/4] Update Poedjadevie_Scatterplot.py

---
 sre2020/Poedjadevie_Scatterplot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py
index 71cf762..7c5b552 100644
--- a/sre2020/Poedjadevie_Scatterplot.py
+++ b/sre2020/Poedjadevie_Scatterplot.py
@@ -60,7 +60,7 @@ def countfiles(authorlist, dictFiles, lsttokens, repo):
 # repo = 'k9mail/k-9'
 
 # put your tokens here
-lstTokens = ['0b0923f7c7f008611cfe72e093bef4c0d8a8c1fe']
+lstTokens = ['']
 
 dictfiles = dict()
 authorlist = list()

From c03d124a7438de07afae60e7b20817d6b1f78741 Mon Sep 17 00:00:00 2001
From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:23:41 +0100
Subject: [PATCH 3/4] Delete Poedjadevie_Scatterplot.py

---
 sre2020/Poedjadevie_Scatterplot.py | 146 -----------------------------
 1 file changed, 146 deletions(-)
 delete mode 100644 sre2020/Poedjadevie_Scatterplot.py

diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py
deleted file mode 100644
index 7c5b552..0000000
--- a/sre2020/Poedjadevie_Scatterplot.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import json
-from pip._vendor import requests
-import csv
-
-# @dictFiles empty dictionary of files
-# @lstTokens GitHub authentication tokens
-def countfiles(authorlist, dictFiles, lsttokens, repo):
-    ipage = 1  # url page counter
-    ct = 0  # token counter
-# loop though all the commit pages until the last returned empty page
-    try:
-        # loop though all the commit pages until the last returned empty page
-        while True:
-            if ct == len(lstTokens):
-                ct = 0
-            spage = str(ipage)
-            commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
-                        '&per_page=100&access_token=' + lsttokens[ct]
-
-            ct += 1
-            content = requests.get(commitsUrl)
-            jsonCommits = json.loads(content.content)
-            # break out of the while loop if there are no more commits in the pages
-            if len(jsonCommits) == 0:
-                break
-                
-            # iterate through the list of commits in a page
-            for shaObject in jsonCommits:
-                sha = shaObject['sha']
-                if ct == len(lstTokens):
-                    ct = 0
-                # For each commit, use the GitHub commit API to extract the files touched by the commit
-                shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
-                         + '?access_token=' + lstTokens[ct]
-                ct += 1
-                content = requests.get(shaUrl)
-                shaDetails = json.loads(content.content)
-                filesjson = shaDetails['files']
-                
-                for filenameObj in filesjson:
-                    filename = filenameObj['filename']
-                    #Only include files that are written in specific back end language
-                    if filename.endswith(('.java','.h','.kt','.js','.cpp')):
-                        #Save the file, author and date in the authorlist
-                        author = list()
-                        author.append(filename)
-                        author.append(shaDetails['commit']['author']['name'])
-                        author.append(shaDetails['commit']['author']['date'])
-                        authorlist.append(author)
-                        #Use the dictfiles to store the ccount of files
-                        dictfiles[filename] = dictfiles.get(filename, 0) + 1 
-            ipage += 1
-    except Exception as e:
-        print(e)
-        exit(0)
-        
-repo = 'scottyab/rootbeer'
-# repo = 'Skyscanner/backpack'
-# repo = 'mendhak/gpslogger'
-# repo = 'k9mail/k-9'
-
-# put your tokens here
-lstTokens = ['']
-
-dictfiles = dict()
-authorlist = list()
-countfiles(authorlist, dictfiles, lstTokens, repo)
-#Sort the dictfiles in ascending order for the count value
-dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1]))
-
-import matplotlib.pyplot as plt
-from datetime import datetime
-import math
-
-listfiles = (sorted(dictfiles.items(), key=lambda x:x[1]))
-top50files = dict()
-
-count=1
-#Simple function to rename the files to f01 to f50
-if len(listfiles) <=50:
-    for file in listfiles:
-        if count<10:
-            top50files[file[0]]= 'f0' + str(count)
-        else:
-            top50files[file[0]]='f' + str(count)
-        count+=1
-else:
-    for file in listfiles[len(listfiles)-50]:
-        if count<10:
-            top50files[file[0]]='f0' + str(count)
-        else:
-            top50files[file[0]]='f' + str(count)
-        count+=1
-
-x = list()
-y = list()
-c = list()
-a = dict()
-
-c1=0
-
-#Function to get the data ready for plotting
-for author in authorlist:
-    if author[0] in top50files:
-        x.append(top50files[author[0]])
-        y.append(author[2].split('T')[0])
-        if author[1] not in a:
-            a[author[1]] = c1
-            c1+=25
-        c.append(a[author[1]])   
-
-#Use this to first sort the data based on the dates (y-axis)
-lists = sorted(zip(y,x,c))
-new_y, new_x, new_c= list(zip(*lists))
-
-weeks= list()
-week = 0
-date_prev = 0
-
-#Function to get the corresponding weeks for the y-axis
-for day in new_y:
-    date = datetime.strptime(day, '%Y-%m-%d')
-    if date_prev == 0:
-        weeks.append(week)
-        date_prev = date
-    else:
-        diff = (date - date_prev).days
-        if diff < 7:
-            weeks.append(week)
-        else:
-            if diff%7 == 0:
-                week = week + diff/7
-                weeks.append(week)
-            else:
-                week += math.floor(diff/7)
-                weeks.append(week)
-        date_prev = date
-
-plt.scatter(new_x,weeks,c=new_c,s=20)
-plt.xlabel("File")
-plt.ylabel("Weeks")
-plt.grid(True)
-plt.show()
-
-file = repo.split('/')[1]
-plt.savefig(file+'Plot.png')

From 1d34f77d6dc3e1f1e58f1c750add99464ccb6af1 Mon Sep 17 00:00:00 2001
From: KadjelRamkisoen <78154694+KadjelRamkisoen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:24:06 +0100
Subject: [PATCH 4/4] Add files via upload

---
 sre2020/Poedjadevie_Scatterplot.py | 146 +++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 sre2020/Poedjadevie_Scatterplot.py

diff --git a/sre2020/Poedjadevie_Scatterplot.py b/sre2020/Poedjadevie_Scatterplot.py
new file mode 100644
index 0000000..7c5b552
--- /dev/null
+++ b/sre2020/Poedjadevie_Scatterplot.py
@@ -0,0 +1,146 @@
+import json
+from pip._vendor import requests
+import csv
+
+# @dictFiles empty dictionary of files
+# @lstTokens GitHub authentication tokens
+def countfiles(authorlist, dictFiles, lsttokens, repo):
+    ipage = 1  # url page counter
+    ct = 0  # token counter
+# loop though all the commit pages until the last returned empty page
+    try:
+        # loop though all the commit pages until the last returned empty page
+        while True:
+            if ct == len(lstTokens):
+                ct = 0
+            spage = str(ipage)
+            commitsUrl = 'https://api.github.com/repos/' + repo + '/commits?page=' + spage + \
+                        '&per_page=100&access_token=' + lsttokens[ct]
+
+            ct += 1
+            content = requests.get(commitsUrl)
+            jsonCommits = json.loads(content.content)
+            # break out of the while loop if there are no more commits in the pages
+            if len(jsonCommits) == 0:
+                break
+                
+            # iterate through the list of commits in a page
+            for shaObject in jsonCommits:
+                sha = shaObject['sha']
+                if ct == len(lstTokens):
+                    ct = 0
+                # For each commit, use the GitHub commit API to extract the files touched by the commit
+                shaUrl = 'https://api.github.com/repos/' + repo + '/commits/' + sha \
+                         + '?access_token=' + lstTokens[ct]
+                ct += 1
+                content = requests.get(shaUrl)
+                shaDetails = json.loads(content.content)
+                filesjson = shaDetails['files']
+                
+                for filenameObj in filesjson:
+                    filename = filenameObj['filename']
+                    #Only include files that are written in specific back end language
+                    if filename.endswith(('.java','.h','.kt','.js','.cpp')):
+                        #Save the file, author and date in the authorlist
+                        author = list()
+                        author.append(filename)
+                        author.append(shaDetails['commit']['author']['name'])
+                        author.append(shaDetails['commit']['author']['date'])
+                        authorlist.append(author)
+                        #Use the dictfiles to store the ccount of files
+                        dictfiles[filename] = dictfiles.get(filename, 0) + 1 
+            ipage += 1
+    except Exception as e:
+        print(e)
+        exit(0)
+        
+repo = 'scottyab/rootbeer'
+# repo = 'Skyscanner/backpack'
+# repo = 'mendhak/gpslogger'
+# repo = 'k9mail/k-9'
+
+# put your tokens here
+lstTokens = ['']
+
+dictfiles = dict()
+authorlist = list()
+countfiles(authorlist, dictfiles, lstTokens, repo)
+#Sort the dictfiles in ascending order for the count value
+dictfiles = dict(sorted(dictfiles.items(), key=lambda x:x[1]))
+
+import matplotlib.pyplot as plt
+from datetime import datetime
+import math
+
+listfiles = (sorted(dictfiles.items(), key=lambda x:x[1]))
+top50files = dict()
+
+count=1
+#Simple function to rename the files to f01 to f50
+if len(listfiles) <=50:
+    for file in listfiles:
+        if count<10:
+            top50files[file[0]]= 'f0' + str(count)
+        else:
+            top50files[file[0]]='f' + str(count)
+        count+=1
+else:
+    for file in listfiles[len(listfiles)-50]:
+        if count<10:
+            top50files[file[0]]='f0' + str(count)
+        else:
+            top50files[file[0]]='f' + str(count)
+        count+=1
+
+x = list()
+y = list()
+c = list()
+a = dict()
+
+c1=0
+
+#Function to get the data ready for plotting
+for author in authorlist:
+    if author[0] in top50files:
+        x.append(top50files[author[0]])
+        y.append(author[2].split('T')[0])
+        if author[1] not in a:
+            a[author[1]] = c1
+            c1+=25
+        c.append(a[author[1]])   
+
+#Use this to first sort the data based on the dates (y-axis)
+lists = sorted(zip(y,x,c))
+new_y, new_x, new_c= list(zip(*lists))
+
+weeks= list()
+week = 0
+date_prev = 0
+
+#Function to get the corresponding weeks for the y-axis
+for day in new_y:
+    date = datetime.strptime(day, '%Y-%m-%d')
+    if date_prev == 0:
+        weeks.append(week)
+        date_prev = date
+    else:
+        diff = (date - date_prev).days
+        if diff < 7:
+            weeks.append(week)
+        else:
+            if diff%7 == 0:
+                week = week + diff/7
+                weeks.append(week)
+            else:
+                week += math.floor(diff/7)
+                weeks.append(week)
+        date_prev = date
+
+plt.scatter(new_x,weeks,c=new_c,s=20)
+plt.xlabel("File")
+plt.ylabel("Weeks")
+plt.grid(True)
+plt.show()
+
+file = repo.split('/')[1]
+plt.savefig(file+'Plot.png')