-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvpCrawler.py
More file actions
53 lines (51 loc) · 2.67 KB
/
vpCrawler.py
File metadata and controls
53 lines (51 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import json
from pprint import pprint
def getDateOfBill(fullDate):
return fullDate[:10]
def main():
parentPath = "./years/"
startingYear = 2015
mergedData = {}
billIDCounter = {} #Counts the different versions of bills with the same bill code
names = {}
count = 0
for year in range(startingYear,2017):
currentParentPath = parentPath+str(year)+"/"
for directory in os.listdir(currentParentPath):
if not directory.startswith('.'): #Ignore hidden files/dirs
currentParentPath = parentPath+str(year)+"/"+str(directory)
data = json.load(open(currentParentPath+'/data.json'))
if "bill" in data and "amendment" not in data:
count += 1
#billCode example: "2015-01-06_00_hr22"
#Layout: Year-Month-Date_billVersion_billCode
#Indices: 4 - 2 - 2 _ 2 _ ...
billID = getDateOfBill(data["date"]) + str(data["bill"]["type"])+str(data["bill"]["number"])
if billID in billIDCounter:
if int(billIDCounter[billID]) < 9:
billIDCounter[billID] = "0"+str(int(billIDCounter[billID])+1)
else:
billIDCounter[billID] = str(int(billIDCounter[billID])+1)
else:
billIDCounter[billID] = "01"
billCode = getDateOfBill(data["date"]) + "_" + billIDCounter[billID] + "_" + str(data["bill"]["type"])+str(data["bill"]["number"])
mergedData[billCode] = {}
mergedData[billCode]["bill"] = data["bill"]
mergedData[billCode]["votes"] = data["votes"]
result = data["result_text"]
mergedData[billCode]["result"] = "Agreed" in result or "Passed" in result or "Overridden" in result
fraction = data["requires"].split("/")
mergedData[billCode]["requires"] = float(fraction[0]) / float(fraction[1])
mergedData[billCode]["vote_id"] = data["vote_id"]
for vote_type in data["votes"]:
for legislator in data["votes"][vote_type]:
names[legislator["id"]] = legislator["party"] + "-" + legislator["state"] + "-" + legislator["display_name"]
# if count > 50:
# break
with open('mergedData.json', 'w') as f:
json.dump(mergedData, f, sort_keys=True, indent=4, separators=(',', ': '))
with open('names.json','w') as f:
json.dump(names, f)
if __name__ == "__main__":
main()