-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiffbot_script.py
More file actions
38 lines (31 loc) · 1.01 KB
/
diffbot_script.py
File metadata and controls
38 lines (31 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests
def main():
"""Preforms request for each url and writes sum result
to a text file (corpus.txt)
"""
urls = prepare_urls()
with open("corpus.txt", "w") as file:
corpus = ""
for url in urls:
request_url = "http://api.diffbot.com/v3/article"
params = {'token': '640f89858861a736e8c6dcdc9f454411',
'url': url,
'discussion': False}
corpus += request(request_url, params)
file.write(corpus)
def prepare_urls():
"""Converts url lists in pages.txt to array of urls"""
urls = []
lines = open("pages.txt").readlines()
for line in lines:
url = line.split("\n")[0]
urls.append(url)
return urls
def request(url, params):
"""Performs HTTP GET on a url an returns the result's text attr"""
response = requests.get(url, params)
response = response.json()['objects'][0]
return response["text"]
if __name__ == "__main__":
main()
print("Complete")