-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOpen Scraper.py
More file actions
79 lines (56 loc) · 2.49 KB
/
Open Scraper.py
File metadata and controls
79 lines (56 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
# coding: utf-8
# In[10]:
from bs4 import BeautifulSoup
from datetime import date
import requests, smtplib, email, ssl
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
# In[11]:
def puncClean(soup):
soup = soup.replace("âs", "\'s")
soup = soup.replace("â", "\"")
soup = soup.replace("â", "\"")
soup = soup.replace("â", "\'")
soup = soup.replace("â", "\'")
soup = soup.replace("â", "—")
soup = soup.replace("\t", "")
soup = soup.replace("\xa0", "")
return soup
# In[12]:
# scrap and clean the text from url
url = 'https://www.djcustomnews.com/tntcaviso/mb.html'
req = requests.get(url)
page = req.text
soup = BeautifulSoup(page, 'html.parser')
marketwrap_soup = soup.find('td', attrs = {'align': 'left','class': 'padding-copy', 'style': 'font-size:14px;line-height:16px;font-family:\'Helvetica Neue\', Helvetica, Arial, \'Roboto\', sans-serif;color:#000000;padding-top:5px;padding-bottom:5px;padding-right:0;padding-left:0;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;mso-table-lspace:0pt;mso-table-rspace:0pt;text-align:auto;'})
headlines_soup = soup.find('td', attrs = {'align': 'left', 'class': 'padding-copy', 'style': 'font-size:14px;line-height:20px;font-family:\'Helvetica Neue\', Helvetica, Arial, \'Roboto\', sans-serif;color:#000000;padding-top:5px;padding-bottom:5px;padding-right:0;padding-left:0;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;mso-table-lspace:0pt;mso-table-rspace:0pt;text-align:auto;'})
text = "Headlines:\n" + puncClean(headlines_soup.get_text()) + puncClean(marketwrap_soup.get_text())
print(text)
# In[14]:
today = date.today()
date1 = today.strftime("%B %d, %Y")
my_address = 'send from Office email'
test_address = 'stefanrobb@ibutt.com
real_address = 'send to emails seperated by a ,'
password = input("Type your password and press enter:")
port = 587
smtp_server = "smtp-mail.outlook.com"
# set up the SMTP server
s = smtplib.SMTP(host=smtp_server, port=port)
s.starttls()
s.login(my_address, password)
msg = MIMEMultipart() # create a message
message = text
print(message)
# setup the parameters of the message
msg['From'] = my_address
msg['To'] = real_address
msg['Subject'] = date1 + ": Morning Briefing"
# add in the message body
msg.attach(MIMEText(message, 'plain'))
# send the message via the server set up earlier.
s.send_message(msg)
del msg
# Terminate the SMTP session and close the connection
s.quit()