-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebScrapingProject.py
More file actions
63 lines (41 loc) · 1.7 KB
/
WebScrapingProject.py
File metadata and controls
63 lines (41 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import requests
from bs4 import BeautifulSoup
import sqlite3
import pandas
def connect():
conn = sqlite3.connect("country.db")
conn.execute("CREATE TABLE IF NOT EXISTS COUNTRY_INFO (NAME TEXT, CAPITAL TEXT, POPULATION INT, AREA FLOAT)")
print("Table created successfully")
conn.close()
def insert_into_table(values):
conn = sqlite3.connect("country.db")
conn.execute("INSERT INTO COUNTRY_INFO (NAME, CAPITAL, POPULATION, AREA) VALUES (?, ?, ?, ?)", values)
conn.commit()
conn.close()
def display_info():
conn = sqlite3.connect("country.db")
cur = conn.cursor()
cur.execute("SELECT * FROM COUNTRY_INFO")
table_data = cur.fetchall()
for record in table_data:
print(record)
conn.close()
if __name__ == "__main__":
url = "https://scrapethissite.com/pages/simple/"
connect()
req = requests.get(url)
scraped_info_list = []
content = req.content
soup = BeautifulSoup(content, "html.parser")
all_countries = soup.find_all("div", {"class": "col-md-4 country"})
for country in all_countries:
country_dict = {"country_name": country.find("h3", {"class": "country-name"}).text.strip(),
"country_capital": country.find("span", {"class": "country-capital"}).text,
"country_pop": country.find("span", {"class": "country-population"}).text,
"country_area": country.find("span", {"class": "country-area"}).text}
print(country_dict.values())
insert_into_table(tuple(country_dict.values()))
display_info()
scraped_info_list.append(country_dict)
dataframe = pandas.DataFrame(scraped_info_list)
dataframe.to_csv("country.csv")