-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
73 lines (50 loc) · 1.81 KB
/
main.py
File metadata and controls
73 lines (50 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from google import search
import os, errno, uuid
import requests
from urllib2 import HTTPError
#from bs4 import BeautifulSoup
from bing_search_api import BingSearchAPI
# api = BingSearchAPI('YOU_API_KEY')
# result = api.searchImage('sunshine')
# print(result.json)
def get_search(text = None, stop = 1000, lang='en'):
return search(text, stop=stop, lang=lang)
def process_search(search=None, folder=None):
if search is None:
return
if folder is None:
return
try:
os.makedirs(folder)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(folder):
pass
else:
raise
try:
for url in search:
local_filename = folder + "/" + uuid.uuid4().__str__().split('-')[4] + '_' + url.split('/')[-1].split('?')[0]
print(url, local_filename)
download_url(url, local_filename)
except HTTPError, e:
print("HTTP Error Occurred: ", e)
def download_url(url, local_filename):
# Have I already gotten the url
try:
r = requests.get(url, stream=True)
f = open(local_filename, 'wb')
for chunk in r.iter_content(chunk_size=512 * 1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.close()
except requests.exceptions.SSLError, e:
print("SSL Error for", url)
except requests.exceptions.InvalidSchema, e:
print("Invalid Schema:", url)
except requests.exceptions.ConnectionError, e:
print("Connection Error:", url)
def main():
s = get_search('filetype:pdf resume java c#')
process_search(s, folder="./out")
if __name__ == "__main__":
main()