diff --git a/.env_template b/.env_template index 49ea0bd..56b5add 100644 --- a/.env_template +++ b/.env_template @@ -2,3 +2,4 @@ YOUTUBE_API_KEY= MONGO_URI= MONGO_DB= STAGE= +DAILY_SUN_URL= \ No newline at end of file diff --git a/README.md b/README.md index 5df5eb3..839e973 100644 --- a/README.md +++ b/README.md @@ -22,4 +22,6 @@ To start the project, run the following command in the terminal ## Setting up the database -Add /graphql to the url to access the interactive GraphQL platform \ No newline at end of file +Create a Mongo database named `score_db` and another named `daily_sun_db`. A partnership with the Daily Sun has given us access to their articles which we copy and paginate the results for frontend. + +Add /graphql to the url to access the interactive GraphQL platform diff --git a/app.py b/app.py index 5a01798..67aec02 100644 --- a/app.py +++ b/app.py @@ -5,6 +5,10 @@ from flask_graphql import GraphQLView from graphene import Schema from src.schema import Query, Mutation +from src.scrapers.games_scraper import fetch_game_schedule +from src.scrapers.youtube_stats import fetch_videos +from src.scrapers.daily_sun_scrape import fetch_news +from src.services.article_service import ArticleService from src.utils.team_loader import TeamLoader import signal import sys @@ -83,6 +87,30 @@ def create_context(): ), ) +# Setup command line arguments +def parse_args(): + parser = argparse.ArgumentParser(description="Skip scraping tasks, for dev purposes.") + parser.add_argument( + "--no-scrape", + action="store_true", + help="Skips scraping tasks if set, useful for frontend development.", + ) + parser.add_argument( + "--no-daily-sun", + action="store_true", + help="Skips using the Daily Sun page for alerts", + ) + return parser.parse_args() + +# Only parse arguments when running directly (not when imported by gunicorn) +if __name__ == "__main__": + args = parse_args() +else: + # Default args when imported by gunicorn + class DefaultArgs: + no_scrape = False + no_daily_sun = False + args = DefaultArgs() def signal_handler(sig, frame): sys.exit(0) @@ -91,5 +119,50 @@ def signal_handler(sig, frame): signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) +# Only parse arguments when running directly (not when imported by gunicorn) +if __name__ == "__main__": + args = parse_args() +else: + # Default args when imported by gunicorn + class DefaultArgs: + no_scrape = False + no_daily_sun = False + args = DefaultArgs() + +# Only run scraping tasks if not disabled +if not args.no_scrape: + from flask_apscheduler import APScheduler + scheduler = APScheduler() + scheduler.init_app(app) + scheduler.start() + + @scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours + def scrape_schedules(): + logging.info("Scraping game schedules...") + fetch_game_schedule() + + @scheduler.task("interval", id="scrape_videos", seconds=43200) # 12 hours + def scrape_videos(): + logging.info("Scraping YouTube videos...") + fetch_videos() + + scrape_schedules() + scrape_videos() + +if not args.no_daily_sun and not args.no_scrape: + @scheduler.task("interval", id="scrape_daily_sun", seconds=3600) + def scrape_daily_sun(): + logging.info("Getting Daily Sun Sports News...") + fetch_news() + + @scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week + def cleanse_daily_sun_db(): + logging.info("Cleaning the Daily Sun database from old articles...") + ArticleService.cleanse_old_articles() + + scrape_daily_sun() + cleanse_daily_sun_db() + + if __name__ == "__main__": app.run(debug=True, host="0.0.0.0", port=8000) diff --git a/src/database.py b/src/database.py index d475437..834808d 100644 --- a/src/database.py +++ b/src/database.py @@ -48,6 +48,7 @@ def keep_connection_alive(): # Access the database db = client[os.getenv("MONGO_DB", "score_db")] +daily_sun_db = client[os.getenv("DAILY_SUN_DB", "daily_sun_db")] def setup_database_indexes(): @@ -65,6 +66,31 @@ def setup_database_indexes(): # Index for sorting operations game_collection.create_index([("date", -1)], background=True) + + # Index to have unique games so we won't add duplicates + game_collection.create_index( + [ + ("sport", 1), + ("gender", 1), + ("date", 1), + ("opponent_id", 1), + ("state", 1), + ], + unique=True, + background=True + ) + + # Additional index for tournament games (without opponent_id) + game_collection.create_index( + [ + ("sport", 1), + ("gender", 1), + ("date", 1), + ("city", 1), + ("state", 1), + ], + background=True + ) print("✅ MongoDB indexes created successfully") except Exception as e: diff --git a/src/models/__init__.py b/src/models/__init__.py index ab83d25..efbf4e5 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -1,3 +1,4 @@ from .game import Game from .team import Team -from .youtube_video import YoutubeVideo \ No newline at end of file +from .youtube_video import YoutubeVideo +from .article import Article \ No newline at end of file diff --git a/src/models/article.py b/src/models/article.py new file mode 100644 index 0000000..bfcb8e1 --- /dev/null +++ b/src/models/article.py @@ -0,0 +1,56 @@ +from bson.objectid import ObjectId +from datetime import datetime + +class Article: + """ + A model representing a news article. + + Attributes: + - title: The title of the article + - image: The filename of the article's main image + - sports_type: The specific sport category + - published_at: The publication date + - url: The URL to the full article + - slug: Unique identifier from the source + - created_at: When the article was added to our DB + """ + def __init__(self, title, sports_type, published_at, url, slug, image=None, id=None, created_at=None): + self.id = id if id else str(ObjectId()) + self.title = title + self.image = image + self.sports_type = sports_type + self.published_at = published_at + self.url = url + self.slug = slug + self.created_at = created_at if created_at else datetime.now() + + def to_dict(self): + """ + Converts the Article object to a dictionary format for MongoDB storage. + """ + return { + "_id": self.id, + "title": self.title, + "image": self.image, + "sports_type": self.sports_type, + "published_at": self.published_at, + "url": self.url, + "slug": self.slug, + "created_at": self.created_at + } + + @staticmethod + def from_dict(data): + """ + Converts a MongoDB document to an Article object. + """ + return Article( + id=data.get("_id"), + title=data.get("title"), + image=data.get("image"), + sports_type=data.get("sports_type"), + published_at=data.get("published_at"), + url=data.get("url"), + slug=data.get("slug"), + created_at=data.get("created_at") + ) \ No newline at end of file diff --git a/src/models/game.py b/src/models/game.py index f3ebcf6..73a7968 100644 --- a/src/models/game.py +++ b/src/models/game.py @@ -17,6 +17,7 @@ class Game: - `time` The time of the game. (optional) - `box_score` The scoring summary of the game (optional) - `score_breakdown` The scoring breakdown of the game (optional) + - 'ticket_link' The ticket link for the game (optional) """ def __init__( @@ -35,6 +36,7 @@ def __init__( score_breakdown=None, team=None, utc_date=None, + ticket_link=None, ): self.id = id if id else str(ObjectId()) self.city = city @@ -50,6 +52,7 @@ def __init__( self.score_breakdown = score_breakdown self.team = team self.utc_date = utc_date + self.ticket_link = ticket_link def to_dict(self): """ @@ -70,6 +73,7 @@ def to_dict(self): "score_breakdown": self.score_breakdown, "team": self.team, "utc_date": self.utc_date, + "ticket_link": self.ticket_link, } @staticmethod @@ -92,4 +96,5 @@ def from_dict(data) -> None: score_breakdown=data.get("score_breakdown"), team=data.get("team"), utc_date=data.get("utc_date"), + ticket_link=data.get("ticket_link"), ) diff --git a/src/models/youtube_video.py b/src/models/youtube_video.py index e45a965..cdd11af 100644 --- a/src/models/youtube_video.py +++ b/src/models/youtube_video.py @@ -12,10 +12,11 @@ class YoutubeVideo: - `thumbnail` The thumbnail of the video, as a URL string pointing to a `.jpg` file. - `url` The URL of the video. - `published_at` The date and time the video was published. + - `duration` The duration of the video. """ def __init__( - self, title, description, thumbnail, b64_thumbnail, url, published_at, id=None + self, title, description, thumbnail, b64_thumbnail, url, published_at, duration=None, id=None ): self.id = id if id else str(ObjectId()) self.title = title @@ -24,6 +25,7 @@ def __init__( self.b64_thumbnail = b64_thumbnail self.url = url self.published_at = published_at + self.duration = duration def to_dict(self): """ @@ -37,6 +39,7 @@ def to_dict(self): "b64_thumbnail": self.b64_thumbnail, "url": self.url, "published_at": self.published_at, + "duration": self.duration, } @staticmethod @@ -52,4 +55,5 @@ def from_dict(data): b64_thumbnail=data.get("b64_thumbnail"), url=data.get("url"), published_at=data.get("published_at"), + duration=data.get("duration"), ) diff --git a/src/mutations/__init__.py b/src/mutations/__init__.py index 3fd3a8a..3df8e4d 100644 --- a/src/mutations/__init__.py +++ b/src/mutations/__init__.py @@ -1,3 +1,4 @@ from .create_game import CreateGame from .create_team import CreateTeam -from .create_youtube_video import CreateYoutubeVideo \ No newline at end of file +from .create_youtube_video import CreateYoutubeVideo +from .create_article import CreateArticle \ No newline at end of file diff --git a/src/mutations/create_article.py b/src/mutations/create_article.py new file mode 100644 index 0000000..19b8920 --- /dev/null +++ b/src/mutations/create_article.py @@ -0,0 +1,26 @@ +from graphene import Mutation, String, Field +from src.types import ArticleType +from src.services.article_service import ArticleService + +class CreateArticle(Mutation): + class Arguments: + title = String(required=True) + sports_type = String(required=True) + published_at = String(required=True) + url = String(required=True) + slug = String(required=True) + image = String(required=False) + + article = Field(lambda: ArticleType) + + def mutate(self, info, title, sports_type, published_at, url, slug, image=None): + article_data = { + "title": title, + "sports_type": sports_type, + "published_at": published_at, # Already in ISO 8601 format + "url": url, + "slug": slug, + "image": image + } + new_article = ArticleService.create_article(article_data) + return CreateArticle(article=new_article) \ No newline at end of file diff --git a/src/mutations/create_game.py b/src/mutations/create_game.py index 205a153..3a52345 100644 --- a/src/mutations/create_game.py +++ b/src/mutations/create_game.py @@ -17,6 +17,7 @@ class Arguments: box_score = String(required=False) score_breakdown = String(required=False) utc_date = String(required=False) + ticket_link = String(required=False) game = Field(lambda: GameType) @@ -34,7 +35,8 @@ def mutate( time=None, box_score=None, score_breakdown=None, - utc_date=None + utc_date=None, + ticket_link=None ): game_data = { "city": city, @@ -48,7 +50,8 @@ def mutate( "time": time, "box_score": box_score, "score_breakdown": score_breakdown, - "utc_date": utc_date + "utc_date": utc_date, + "ticket_link": ticket_link } new_game = GameService.create_game(game_data) return CreateGame(game=new_game) \ No newline at end of file diff --git a/src/mutations/create_youtube_video.py b/src/mutations/create_youtube_video.py index 9f39bf7..ed2cb40 100644 --- a/src/mutations/create_youtube_video.py +++ b/src/mutations/create_youtube_video.py @@ -8,13 +8,14 @@ class Arguments: title = String(required=True) description = String(required=True) thumbnail = String(required=True) - b64_thumbnail = String(required=True) + b64_thumbnail = String(required=False) url = String(required=True) published_at = String(required=True) + duration = String(required=True) youtube_video = Field(lambda: YoutubeVideoType) - def mutate(self, info, id, title, description, thumbnail, url, published_at): + def mutate(self, info, id, title, description, thumbnail, url, published_at, duration, b64_thumbnail=None): video_data = { "id": id, "title": title, @@ -23,6 +24,7 @@ def mutate(self, info, id, title, description, thumbnail, url, published_at): "b64_thumbnail": b64_thumbnail, "url": url, "published_at": published_at, + "duration": duration, } new_video = YoutubeVideoService.create_video(video_data) return CreateYoutubeVideo(youtube_video=new_video) \ No newline at end of file diff --git a/src/queries/__init__.py b/src/queries/__init__.py index f345409..fdf2f41 100644 --- a/src/queries/__init__.py +++ b/src/queries/__init__.py @@ -1,3 +1,4 @@ from .game_query import GameQuery from .team_query import TeamQuery from .youtube_video_query import YoutubeVideoQuery +from .article_query import ArticleQuery \ No newline at end of file diff --git a/src/queries/article_query.py b/src/queries/article_query.py new file mode 100644 index 0000000..52e6cbc --- /dev/null +++ b/src/queries/article_query.py @@ -0,0 +1,12 @@ +from graphene import ObjectType, List, String +from src.services.article_service import ArticleService +from src.types import ArticleType + +class ArticleQuery(ObjectType): + articles = List(ArticleType, sports_type=String()) + + def resolve_articles(self, info, sports_type=None): + """ + Resolver for retrieving news articles, optionally filtered by sports_type. + """ + return ArticleService.get_articles(sports_type) \ No newline at end of file diff --git a/src/queries/game_query.py b/src/queries/game_query.py index 4aa8a55..3c04116 100644 --- a/src/queries/game_query.py +++ b/src/queries/game_query.py @@ -1,4 +1,4 @@ -from graphene import ObjectType, String, Field, List, Int +from graphene import ObjectType, String, Field, List, Int, DateTime from src.services.game_service import GameService from src.types import GameType @@ -20,12 +20,14 @@ class GameQuery(ObjectType): sport=String(required=True), state=String(required=True), time=String(required=True), + ticket_link=String(required=False), ) games_by_sport = List(GameType, sport=String(required=True)) games_by_gender = List(GameType, gender=String(required=True)) games_by_sport_gender = List( GameType, sport=String(required=True), gender=String(required=True) ) + games_by_date = List(GameType, startDate=DateTime(required=True), endDate=DateTime(required=True)) def resolve_games(self, info, limit=100, offset=0): """ @@ -40,13 +42,13 @@ def resolve_game(self, info, id): return GameService.get_game_by_id(id) def resolve_game_by_data( - self, info, city, date, gender, opponent_id, sport, state, time, location=None + self, info, city, date, gender, opponent_id, sport, state, time, location=None, ticket_link=None ): """ Resolver for retrieving a game by its data. """ return GameService.get_game_by_data( - city, date, gender, location, opponent_id, sport, state, time + city, date, gender, location, opponent_id, sport, state, time, ticket_link ) def resolve_games_by_sport(self, info, sport): @@ -66,3 +68,9 @@ def resolve_games_by_sport_gender(self, info, sport, gender): Resolver for retrieving all games by its sport and gender. """ return GameService.get_games_by_sport_gender(sport, gender) + + def resolve_games_by_date(self, info, startDate, endDate): + """ + Resolver for retrieving games by date. + """ + return GameService.get_games_by_date(startDate, endDate) diff --git a/src/repositories/__init__.py b/src/repositories/__init__.py index 1c18bb7..f9c6252 100644 --- a/src/repositories/__init__.py +++ b/src/repositories/__init__.py @@ -1,3 +1,4 @@ from .game_repository import GameRepository from .team_repository import TeamRepository from .youtube_video_repository import YoutubeVideoRepository +from .article_repository import ArticleRepository \ No newline at end of file diff --git a/src/repositories/article_repository.py b/src/repositories/article_repository.py new file mode 100644 index 0000000..440f856 --- /dev/null +++ b/src/repositories/article_repository.py @@ -0,0 +1,85 @@ +from src.database import daily_sun_db +from src.models.article import Article +from pymongo import UpdateOne +from datetime import datetime, timedelta, timezone + +class ArticleRepository: + @staticmethod + def upsert(article): + """ + Upsert an article into the 'news_articles' collection in MongoDB. + """ + article_collection = daily_sun_db["news_articles"] + article_dict = article.to_dict() + # Remove _id from the update to avoid MongoDB error + article_dict.pop("_id", None) + + article_collection.update_one( + {"slug": article.slug}, + {"$set": article_dict}, + upsert=True + ) + + @staticmethod + def bulk_upsert(articles): + """ + Bulk upsert articles into the 'news_articles' collection based on slug. + """ + if not articles: + return + + article_collection = daily_sun_db["news_articles"] + operations = [] + for article in articles: + article_dict = article.to_dict() + # Remove _id from the update to avoid MongoDB error + article_dict.pop("_id", None) + + operations.append( + UpdateOne( + {"slug": article.slug}, + {"$set": article_dict}, + upsert=True + ) + ) + + if operations: + article_collection.bulk_write(operations) + + @staticmethod + def find_recent(limit_days=3): + """ + Retrieve articles from the last N days, sorted by published_at descending. + """ + article_collection = daily_sun_db["news_articles"] + # Calculate threshold as ISO 8601 string + threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z') + query = {"published_at": {"$gte": threshold}} + articles = article_collection.find(query).sort("published_at", -1) + return [Article.from_dict(article) for article in articles] + + @staticmethod + def find_by_sports_type(sports_type, limit_days=3): + """ + Retrieve articles by sports_type from the last N days, sorted by published_at descending. + """ + article_collection = daily_sun_db["news_articles"] + # Calculate threshold as ISO 8601 string + threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z') + query = { + "sports_type": sports_type, + "published_at": {"$gte": threshold} + } + articles = article_collection.find(query).sort("published_at", -1) + return [Article.from_dict(article) for article in articles] + + @staticmethod + def delete_not_recent(limit_days=3): + """ + Delete articles older than N days, sorted by published_at descending. + """ + article_collection = daily_sun_db["news_articles"] + # Calculate threshold as ISO 8601 string + threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z') + query = {"published_at": {"$lt": threshold}} + article_collection.delete_many(query) \ No newline at end of file diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py index bfe5d08..95e679b 100644 --- a/src/repositories/game_repository.py +++ b/src/repositories/game_repository.py @@ -130,6 +130,56 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state): return [Game.from_dict(game) for game in games] + @staticmethod + def find_by_tournament_key_fields(city, date, gender, location, sport, state): + """ + Find tournament games by location and date (excluding opponent_id). + This is used when we need to find a tournament game that might have a placeholder team. + Uses flexible matching to handle TBD/TBA values. + """ + game_collection = db["game"] + + # Build flexible query that can handle TBD/TBA values + query = { + "date": date, + "gender": gender, + "sport": sport, + } + + # For city, state, and location, use flexible matching + # This allows finding games even when TBD/TBA values change to real values + city_conditions = [] + if city: + city_conditions.append(city) + else: + city_conditions = [None] + + state_conditions = [] + if state: + state_conditions.append(state) + else: + state_conditions = [None] + + location_conditions = [] + if location: + location_conditions.append(location) + else: + location_conditions = [None] + + query["city"] = {"$in": city_conditions} + query["state"] = {"$in": state_conditions} + query["location"] = {"$in": location_conditions} + + games = list(game_collection.find(query)) + + if not games: + return None + + if len(games) == 1: + return Game.from_dict(games[0]) + + return [Game.from_dict(game) for game in games] + @staticmethod def find_by_sport(sport): """ @@ -156,3 +206,52 @@ def find_by_sport_gender(sport, gender): game_collection = db["game"] games = game_collection.find({"sport": sport, "gender": gender}) return [Game.from_dict(game) for game in games] + + @staticmethod + def find_games_by_sport_gender_after_date(sport, gender, after_date=None): + """ + Find games for a specific sport and gender, optionally after a specific date. + This method returns raw game data without team information. + """ + game_collection = db["game"] + + query = { + "sport": sport, + "gender": gender + } + + if after_date: + query["utc_date"] = {"$gt": after_date} + + games = game_collection.find(query) + return [Game.from_dict(game) for game in games] + + @staticmethod + def find_by_date(startDate, endDate): + """ + Retrieve all games from the 'game' collection in MongoDB for games + between certain dates. + """ + game_collection = db["game"] + + start_str = startDate.isoformat() + endDate = endDate.isoformat() + + query = { + "utc_date": { + "$gte": start_str, + "$lte": endDate + } + } + + games = game_collection.find(query) + return [Game.from_dict(game) for game in games] + + @staticmethod + def delete_games_by_ids(game_ids): + """ + Delete games by their IDs. + """ + game_collection = db["game"] + result = game_collection.delete_many({"_id": {"$in": game_ids}}) + return result.deleted_count diff --git a/src/schema.py b/src/schema.py index 2cbbe69..0f3ae99 100644 --- a/src/schema.py +++ b/src/schema.py @@ -1,9 +1,9 @@ from graphene import ObjectType, Schema, Mutation -from src.mutations import CreateGame, CreateTeam, CreateYoutubeVideo -from src.queries import GameQuery, TeamQuery, YoutubeVideoQuery +from src.mutations import CreateGame, CreateTeam, CreateYoutubeVideo, CreateArticle +from src.queries import GameQuery, TeamQuery, YoutubeVideoQuery, ArticleQuery -class Query(TeamQuery, GameQuery, YoutubeVideoQuery, ObjectType): +class Query(TeamQuery, GameQuery, YoutubeVideoQuery, ArticleQuery, ObjectType): pass @@ -11,6 +11,7 @@ class Mutation(ObjectType): create_game = CreateGame.Field(description="Creates a new game.") create_team = CreateTeam.Field(description="Creates a new team.") create_youtube_video = CreateYoutubeVideo.Field(description="Creates a new youtube video.") + create_article = CreateArticle.Field(description="Creates a new article.") schema = Schema(query=Query, mutation=Mutation) diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py new file mode 100644 index 0000000..4b5cd24 --- /dev/null +++ b/src/scrapers/daily_sun_scrape.py @@ -0,0 +1,81 @@ +import os +import requests +from datetime import datetime, timedelta, timezone +from dotenv import load_dotenv +from ..services import ArticleService +from ..utils.constants import ARTICLE_IMG_TAG +from ..utils.helpers import extract_sport_type_from_title +import logging +from bs4 import BeautifulSoup +import base64 + +load_dotenv() + + +def fetch_news(): + try: + url = os.getenv("DAILY_SUN_URL") + response = requests.get( + url, + headers={ + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + } + ) + response.raise_for_status() + data = response.json() + + # Current date and 3-day threshold (in UTC) + current_date = datetime.now(timezone.utc) + three_days_ago = current_date - timedelta(days=3) + + # Process articles + articles_to_store = [] + for article in data.get("articles", []): + published_at_dt = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S") + # Assume the timezone is UTC and convert to ISO 8601 format string + published_at_dt = published_at_dt.replace(tzinfo=timezone.utc) + published_at = published_at_dt.isoformat().replace('+00:00', 'Z') + + if published_at_dt >= three_days_ago: + # Extract sport type from title + title = article["headline"] + sports_type = extract_sport_type_from_title(title) + article_url = f"https://cornellsun.com/article/{article['slug']}" + + article_image = None + try: + response = requests.get( + article_url, + headers={ + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + } + ) + response.raise_for_status() + soup = BeautifulSoup(response.content, 'html.parser') + img_tag = soup.select_one(ARTICLE_IMG_TAG) + if img_tag and img_tag.get('src'): + article_image=img_tag.get('src') + except Exception as e: + logging.error(f"Error fetching news: {str(e)}") + article_doc = { + "title": article["headline"], + "image": article_image, + "sports_type": sports_type, + "published_at": published_at, + "url": article_url, + "slug": article["slug"], + "created_at": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + } + articles_to_store.append(article_doc) + + + if articles_to_store: + ArticleService.create_articles_bulk(articles_to_store) + logging.info(f"Stored/Updated {len(articles_to_store)} recent articles") + else: + logging.info("No recent articles to store") + return True + + except Exception as e: + logging.error(f"Error fetching news: {str(e)}") + return False diff --git a/src/scrapers/game_details_scrape.py b/src/scrapers/game_details_scrape.py index 8fce04a..5b6fc6d 100644 --- a/src/scrapers/game_details_scrape.py +++ b/src/scrapers/game_details_scrape.py @@ -31,16 +31,24 @@ def extract_teams_and_scores(box_score_section, sport): period_scores = [] for row in score_table.find(TAG_TBODY).find_all(TAG_TR): - team_name_cell = row.find(TAG_TH) if sport == 'ice hockey' else row.find(TAG_TD) + # Check if team name is in