-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscrape.py
More file actions
35 lines (25 loc) · 782 Bytes
/
scrape.py
File metadata and controls
35 lines (25 loc) · 782 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys
import requests
import json
from lxml import html
reload(sys)
sys.setdefaultencoding("utf-8")
print("Building data structure.")
from lib.scraper import Scraper
data = Scraper.build()
print("Building data structure complete.")
content = json.load(open('data/results3.json', 'r'))
for president in data.keys():
for category in data[president].keys():
for i in range(0, len(data[president][category])):
if data[president][category][i]['content'] is not None:
continue
pid = data[president][category][i]['pid']
if str(pid) in content:
data[president][category][i]['content'] = content[str(pid)]
print("Success: " + str(pid))
else:
print("Not Found: " + str(pid))
f = open("data/combined.json", "w")
f.write(json.dumps(data))
f.close()