MyCapPython/webscraper.py at main · yatharth2403/MyCapPython · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# in this project I'll be using the following url for finding
# country_name and country_info and storing it into a csv file

import requests
from bs4 import BeautifulSoup
import pandas

ignore_missing_imports = True

url = "https://scrapethissite.com/pages/simple/"
scrapped_info_list = []

req = requests.get(url)
content = req.content

soup = BeautifulSoup(content, "html.parser")

all_countries = soup.find_all("div", {"class": "col-md-4"})

for country in all_countries:
    country_dict = {"name": country.find("h3", {"class": "country-name"}).text.strip(),
                    "capital": country.find("span", {"class": "country-capital"}).text,
                    "population": country.find("span", {"class": "country-population"}).text,
                    "area": country.find("span", {"class": "country-area"}).text}
    scrapped_info_list.append(country_dict)

dataFrame = pandas.DataFrame(scrapped_info_list)
dataFrame.to_csv("Countries.csv")