Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions advisory_parser/parsers/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
# License: LGPLv3+

import re
import logging
from datetime import datetime

from advisory_parser.exceptions import AdvisoryParserTextException
from advisory_parser.flaw import Flaw
from .utils import get_text_from_url, CVE_REGEX

logger = logging.getLogger(__name__)

# Chromium does not publish CVSS scores with their CVEs so these values are
# best-effort guesses based on impact.
CVSS3_MAP = {
Expand All @@ -18,6 +21,13 @@
"low": "4.3/CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:N/A:L",
}

# Constants for Chrome advisory parsing
SECURITY_FIXES_HEADER = "Security Fixes"
LABELS_FOOTER = "Labels:\nStable updates"
DATE_FORMAT = "%A, %B %d, %Y"
CHROME_VERSION_REGEX = r"\d{2,3}\.\d\.\d{4}\.\d{2,3}"
CHROMIUM_ISSUE_BASE_URL = "https://code.google.com/p/chromium/issues/detail?id="


def parse_chrome_advisory(url):
advisory_text = get_text_from_url(url)
Expand All @@ -26,28 +36,28 @@ def parse_chrome_advisory(url):
# https://chromereleases.googleblog.com/2018/04/stable-channel-update-for-desktop.html
advisory_text = re.sub(r"(.)\[", r"\1\n[", advisory_text)

if "Security Fixes" not in advisory_text:
if SECURITY_FIXES_HEADER not in advisory_text:
raise AdvisoryParserTextException("No security fixes found in {}".format(url))

# Throw away parts of the text after the blog post
flaws_text = advisory_text.split("Labels:\nStable updates")[0].strip()
flaws_text = advisory_text.split(LABELS_FOOTER)[0].strip()

# Parse out public date
match = re.search("^Stable Channel Update for Desktop\n(.+)", flaws_text, re.MULTILINE)
if not match:
raise AdvisoryParserTextException("Could not find public date in {}".format(url))

try:
public_date = datetime.strptime(match.group(1), "%A, %B %d, %Y")
public_date = datetime.strptime(match.group(1), DATE_FORMAT)
except ValueError:
raise AdvisoryParserTextException(
"Could not parse public date ({}) from {}".format(match.group(1), url)
)

# Find Chrome version, e.g. 46.0.2490.71
try:
fixed_in = re.search(r"\d{2,3}\.\d\.\d{4}\.\d{2,3}", flaws_text).group(0)
except ValueError:
fixed_in = re.search(CHROME_VERSION_REGEX, flaws_text).group(0)
except (AttributeError, ValueError):
raise AdvisoryParserTextException("Could not find fixed-in version in {}".format(url))

# Filter out lines that contain CVEs
Expand All @@ -72,7 +82,7 @@ def parse_chrome_advisory(url):
else:
match = re.search(r"(Critical|High|Medium|Low)", metadata)
if not match:
print("Could not find impact; skipping: {}".format(line))
logger.warning("Could not find impact; skipping: %s", line)
continue
else:
impact = match.group(1)
Expand Down Expand Up @@ -110,7 +120,7 @@ def parse_chrome_advisory(url):

description += "\n\nUpstream bug(s):\n"
for bug in bug_ids:
description += "\nhttps://code.google.com/p/chromium/issues/detail?id=" + bug
description += "\n" + CHROMIUM_ISSUE_BASE_URL + bug

com_url = (
url if "blogspot.com" in url else re.sub(r"blogspot\.[^/]*/", "blogspot.com/", url)
Expand Down
28 changes: 19 additions & 9 deletions advisory_parser/parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,44 @@
# License: LGPLv3+

import re
import gzip
import logging
from urllib.error import HTTPError, URLError
from urllib.request import urlopen, Request

from bs4 import BeautifulSoup

from advisory_parser.exceptions import AdvisoryParserGetContentException

logger = logging.getLogger(__name__)

CVE_REGEX = re.compile(r"CVE-\d{4}-\d{4,}")


def get_request(url):
user_agent = (
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
)
headers = {"User-Agent": user_agent}
headers = {
"User-Agent": "Advisory-Parser/1.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
}
request = Request(url, None, headers)
try:
res = urlopen(request)
res = urlopen(request, timeout=30)
data = res.read()

# Handle gzip-compressed responses
if res.headers.get("Content-Encoding") == "gzip":
data = gzip.decompress(data)
except HTTPError as e:
error_msg = "Failed to GET with status code: {}".format(e.code)
error_msg = "Failed to GET {} with status code: {}".format(url, e.code)
raise AdvisoryParserGetContentException(error_msg)
except URLError as e:
error_msg = "Failed to establish connection: {}".format(e.reason)
error_msg = "Failed to establish connection to {}: {}".format(url, e.reason)
raise AdvisoryParserGetContentException(error_msg)
except ValueError:
raise AdvisoryParserGetContentException("Invalid URL specified.")
raise AdvisoryParserGetContentException("Invalid URL specified: {}".format(url))
else:
return res.read()
return data


def get_text_from_url(url):
Expand Down