From 3d88699951eade0abe1cc9bf7adaafd196cde3da Mon Sep 17 00:00:00 2001
From: Mateo <maw346@cornell.edu>
Date: Mon, 24 Mar 2025 20:23:11 -0400
Subject: [PATCH 01/30] implement articles from daily sun

---
 README.md                              |  4 +-
 app.py                                 | 24 ++++++++-
 src/database.py                        |  3 +-
 src/models/__init__.py                 |  3 +-
 src/models/article.py                  | 56 ++++++++++++++++++++
 src/mutations/__init__.py              |  3 +-
 src/mutations/create_article.py        | 27 ++++++++++
 src/queries/__init__.py                |  1 +
 src/queries/article_query.py           | 12 +++++
 src/repositories/__init__.py           |  1 +
 src/repositories/article_repository.py | 69 ++++++++++++++++++++++++
 src/schema.py                          |  7 +--
 src/scrapers/daily_sun_scrape.py       | 59 +++++++++++++++++++++
 src/services/__init__.py               |  3 +-
 src/services/article_service.py        | 73 ++++++++++++++++++++++++++
 src/types.py                           | 29 +++++++++-
 16 files changed, 363 insertions(+), 11 deletions(-)
 create mode 100644 src/models/article.py
 create mode 100644 src/mutations/create_article.py
 create mode 100644 src/queries/article_query.py
 create mode 100644 src/repositories/article_repository.py
 create mode 100644 src/scrapers/daily_sun_scrape.py
 create mode 100644 src/services/article_service.py

diff --git a/README.md b/README.md
index 5df5eb3..839e973 100644
--- a/README.md
+++ b/README.md
@@ -22,4 +22,6 @@ To start the project, run the following command in the terminal
 
 ## Setting up the database
 
-Add /graphql to the url to access the interactive GraphQL platform
\ No newline at end of file
+Create a Mongo database named `score_db` and another named `daily_sun_db`. A partnership with the Daily Sun has given us access to their articles which we copy and paginate the results for frontend.
+
+Add /graphql to the url to access the interactive GraphQL platform
diff --git a/app.py b/app.py
index 5debd7e..860dc89 100644
--- a/app.py
+++ b/app.py
@@ -7,6 +7,8 @@
 from src.schema import Query, Mutation
 from src.scrapers.games_scraper import fetch_game_schedule
 from src.scrapers.youtube_stats import fetch_videos
+from src.scrapers.daily_sun_scrape import fetch_news
+from src.services.article_service import ArticleService
 from src.utils.team_loader import TeamLoader
 
 app = Flask(__name__)
@@ -42,6 +44,11 @@ def parse_args():
         action="store_true",
         help="Skips scraping tasks if set, useful for frontend development.",
     )
+    parser.add_argument(
+        "--no-daily-sun",
+        action="store_true",
+        help="Skips using the Daily Sun page for alerts",
+    )
     return parser.parse_args()
 
 args = parse_args()
@@ -52,7 +59,7 @@ def scrape_schedules():
         logging.info("Scraping game schedules...")
         fetch_game_schedule()
 
-    @scheduler.task("interval", id="scrape_schedules", seconds=43200)
+    @scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours
     def scrape_videos():
         logging.info("Scraping YouTube videos...")
         fetch_videos()
@@ -60,5 +67,20 @@ def scrape_videos():
     scrape_schedules()
     scrape_videos()
 
+if not args.no_daily_sun:
+    @scheduler.task("interval", id="scrape_daily_sun", seconds=3600)
+    def scrape_daily_sun():
+        logging.info("Getting Daily Sun Sports News...")
+        fetch_news()
+
+    @scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week
+    def cleanse_daily_sun_db():
+        logging.info("Cleaning the Daily Sun database from old articles...")
+        ArticleService.cleanse_old_articles()
+
+    scrape_daily_sun()
+    cleanse_daily_sun_db()
+
+
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/src/database.py b/src/database.py
index 19801e0..b9c25e5 100644
--- a/src/database.py
+++ b/src/database.py
@@ -6,7 +6,7 @@
 
 if os.getenv("STAGE") == "local":
   file_name = "ca-certificate.crt"
-  use_tls = os.getenv("MONGO_URI") != "mongodb://localhost:27017/"
+  use_tls = "localhost" not in os.getenv("MONGO_URI")
 else:
   file_name = "/etc/ssl/ca-certificate.crt"
   use_tls = True
@@ -17,3 +17,4 @@
     client = MongoClient(os.getenv("MONGO_URI"))
 
 db = client[os.getenv("MONGO_DB", "score_db")]
+daily_sun_db = client[os.getenv("DAILY_SUN_DB", "daily_sun_db")]
diff --git a/src/models/__init__.py b/src/models/__init__.py
index ab83d25..efbf4e5 100644
--- a/src/models/__init__.py
+++ b/src/models/__init__.py
@@ -1,3 +1,4 @@
 from .game import Game
 from .team import Team
-from .youtube_video import YoutubeVideo
\ No newline at end of file
+from .youtube_video import YoutubeVideo
+from .article import Article
\ No newline at end of file
diff --git a/src/models/article.py b/src/models/article.py
new file mode 100644
index 0000000..bfcb8e1
--- /dev/null
+++ b/src/models/article.py
@@ -0,0 +1,56 @@
+from bson.objectid import ObjectId
+from datetime import datetime
+
+class Article:
+    """
+    A model representing a news article.
+
+    Attributes:
+        - title: The title of the article
+        - image: The filename of the article's main image
+        - sports_type: The specific sport category
+        - published_at: The publication date
+        - url: The URL to the full article
+        - slug: Unique identifier from the source
+        - created_at: When the article was added to our DB
+    """
+    def __init__(self, title, sports_type, published_at, url, slug, image=None, id=None, created_at=None):
+        self.id = id if id else str(ObjectId())
+        self.title = title
+        self.image = image
+        self.sports_type = sports_type
+        self.published_at = published_at
+        self.url = url
+        self.slug = slug
+        self.created_at = created_at if created_at else datetime.now()
+
+    def to_dict(self):
+        """
+        Converts the Article object to a dictionary format for MongoDB storage.
+        """
+        return {
+            "_id": self.id,
+            "title": self.title,
+            "image": self.image,
+            "sports_type": self.sports_type,
+            "published_at": self.published_at,
+            "url": self.url,
+            "slug": self.slug,
+            "created_at": self.created_at
+        }
+
+    @staticmethod
+    def from_dict(data):
+        """
+        Converts a MongoDB document to an Article object.
+        """
+        return Article(
+            id=data.get("_id"),
+            title=data.get("title"),
+            image=data.get("image"),
+            sports_type=data.get("sports_type"),
+            published_at=data.get("published_at"),
+            url=data.get("url"),
+            slug=data.get("slug"),
+            created_at=data.get("created_at")
+        )
\ No newline at end of file
diff --git a/src/mutations/__init__.py b/src/mutations/__init__.py
index 3fd3a8a..3df8e4d 100644
--- a/src/mutations/__init__.py
+++ b/src/mutations/__init__.py
@@ -1,3 +1,4 @@
 from .create_game import CreateGame
 from .create_team import CreateTeam
-from .create_youtube_video import CreateYoutubeVideo
\ No newline at end of file
+from .create_youtube_video import CreateYoutubeVideo
+from .create_article import CreateArticle
\ No newline at end of file
diff --git a/src/mutations/create_article.py b/src/mutations/create_article.py
new file mode 100644
index 0000000..1e0a03b
--- /dev/null
+++ b/src/mutations/create_article.py
@@ -0,0 +1,27 @@
+from graphene import Mutation, String, Field
+from src.types import ArticleType
+from src.services.article_service import ArticleService
+
+class CreateArticle(Mutation):
+    class Arguments:
+        title = String(required=True)
+        sports_type = String(required=True)
+        published_at = String(required=True)
+        url = String(required=True)
+        slug = String(required=True)
+        image = String(required=False)
+
+    article = Field(lambda: ArticleType)
+
+    def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
+        from datetime import datetime
+        article_data = {
+            "title": title,
+            "sports_type": sports_type,
+            "published_at": datetime.fromisoformat(published_at),
+            "url": url,
+            "slug": slug,
+            "image": image
+        }
+        new_article = ArticleService.create_article(article_data)
+        return CreateArticle(article=new_article)
\ No newline at end of file
diff --git a/src/queries/__init__.py b/src/queries/__init__.py
index f345409..fdf2f41 100644
--- a/src/queries/__init__.py
+++ b/src/queries/__init__.py
@@ -1,3 +1,4 @@
 from .game_query import GameQuery
 from .team_query import TeamQuery
 from .youtube_video_query import YoutubeVideoQuery
+from .article_query import ArticleQuery
\ No newline at end of file
diff --git a/src/queries/article_query.py b/src/queries/article_query.py
new file mode 100644
index 0000000..52e6cbc
--- /dev/null
+++ b/src/queries/article_query.py
@@ -0,0 +1,12 @@
+from graphene import ObjectType, List, String
+from src.services.article_service import ArticleService
+from src.types import ArticleType
+
+class ArticleQuery(ObjectType):
+    articles = List(ArticleType, sports_type=String())
+
+    def resolve_articles(self, info, sports_type=None):
+        """
+        Resolver for retrieving news articles, optionally filtered by sports_type.
+        """
+        return ArticleService.get_articles(sports_type)
\ No newline at end of file
diff --git a/src/repositories/__init__.py b/src/repositories/__init__.py
index 1c18bb7..f9c6252 100644
--- a/src/repositories/__init__.py
+++ b/src/repositories/__init__.py
@@ -1,3 +1,4 @@
 from .game_repository import GameRepository
 from .team_repository import TeamRepository
 from .youtube_video_repository import YoutubeVideoRepository
+from .article_repository import ArticleRepository
\ No newline at end of file
diff --git a/src/repositories/article_repository.py b/src/repositories/article_repository.py
new file mode 100644
index 0000000..1a30dc5
--- /dev/null
+++ b/src/repositories/article_repository.py
@@ -0,0 +1,69 @@
+from src.database import daily_sun_db
+from src.models.article import Article
+from pymongo import UpdateOne
+from datetime import datetime, timedelta
+
+class ArticleRepository:
+    @staticmethod
+    def upsert(article):
+        """
+        Upsert an article into the 'news_articles' collection in MongoDB.
+        """
+        article_collection = daily_sun_db["news_articles"]
+        article_collection.update_one(
+            {"slug": article.slug},
+            {"$set": article.to_dict()},
+            upsert=True
+        )
+
+    @staticmethod
+    def bulk_upsert(articles):
+        """
+        Bulk upsert articles into the 'news_articles' collection based on slug.
+        """
+        if not articles:
+            return
+
+        article_collection = daily_sun_db["news_articles"]
+        operations = [
+            UpdateOne(
+                {"slug": article.slug},
+                {"$set": article.to_dict()},
+                upsert=True
+            )
+            for article in articles
+        ]
+        if operations:
+            article_collection.bulk_write(operations)
+
+    @staticmethod
+    def find_recent(limit_days=3):
+        """
+        Retrieve articles from the last N days, sorted by published_at descending.
+        """
+        article_collection = daily_sun_db["news_articles"]
+        query = {"published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}}
+        articles = article_collection.find(query).sort("published_at", -1)
+        return [Article.from_dict(article) for article in articles]
+
+    @staticmethod
+    def find_by_sports_type(sports_type, limit_days=3):
+        """
+        Retrieve articles by sports_type from the last N days, sorted by published_at descending.
+        """
+        article_collection = daily_sun_db["news_articles"]
+        query = {
+            "sports_type": sports_type,
+            "published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}
+        }
+        articles = article_collection.find(query).sort("published_at", -1)
+        return [Article.from_dict(article) for article in articles]
+    
+    @staticmethod
+    def delete_not_recent(limit_days=3):
+        """
+        Delete articles older than N days, sorted by published_at descending.
+        """
+        article_collection = daily_sun_db["news_articles"]
+        query = {"published_at": {"$lt": datetime.now() - timedelta(days=limit_days)}}
+        article_collection.delete_many(query)
\ No newline at end of file
diff --git a/src/schema.py b/src/schema.py
index 2cbbe69..0f3ae99 100644
--- a/src/schema.py
+++ b/src/schema.py
@@ -1,9 +1,9 @@
 from graphene import ObjectType, Schema, Mutation
-from src.mutations import CreateGame, CreateTeam, CreateYoutubeVideo
-from src.queries import GameQuery, TeamQuery, YoutubeVideoQuery
+from src.mutations import CreateGame, CreateTeam, CreateYoutubeVideo, CreateArticle
+from src.queries import GameQuery, TeamQuery, YoutubeVideoQuery, ArticleQuery
 
 
-class Query(TeamQuery, GameQuery, YoutubeVideoQuery, ObjectType):
+class Query(TeamQuery, GameQuery, YoutubeVideoQuery, ArticleQuery, ObjectType):
     pass
 
 
@@ -11,6 +11,7 @@ class Mutation(ObjectType):
     create_game = CreateGame.Field(description="Creates a new game.")
     create_team = CreateTeam.Field(description="Creates a new team.")
     create_youtube_video = CreateYoutubeVideo.Field(description="Creates a new youtube video.")
+    create_article = CreateArticle.Field(description="Creates a new article.")
 
 
 schema = Schema(query=Query, mutation=Mutation)
diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
new file mode 100644
index 0000000..cd11c5a
--- /dev/null
+++ b/src/scrapers/daily_sun_scrape.py
@@ -0,0 +1,59 @@
+import os
+import requests
+from datetime import datetime, timedelta
+from dotenv import load_dotenv
+from ..services import ArticleService
+import logging
+
+load_dotenv()
+
+
+def fetch_news():
+    try:
+        url = os.getenv("DAILY_SUN_URL")
+        response = requests.get(
+            url,
+            headers={
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+            }
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        # Current date and 3-day threshold
+        current_date = datetime.now()
+        three_days_ago = current_date - timedelta(days=3)
+
+        # Process articles
+        articles_to_store = []
+        for article in data.get("articles", []):
+            published_at = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
+            
+            if published_at >= three_days_ago:
+                sports_type = next(
+                    (tag["name"] for tag in article["tags"] if tag["name"] not in ["Sports", "Top Stories"]),
+                    "General"
+                )
+                article_url = f"https://cornellsun.com/article/{article['slug']}"
+
+                article_doc = {
+                    "title": article["headline"],
+                    "image": article["dominantMedia"]["title"] if article["dominantMedia"] else None,
+                    "sports_type": sports_type,
+                    "published_at": published_at,
+                    "url": article_url,
+                    "slug": article["slug"],
+                    "created_at": datetime.now()
+                }
+                articles_to_store.append(article_doc)
+
+        if articles_to_store:
+            ArticleService.create_articles_bulk(articles_to_store)
+            logging.info(f"Stored/Updated {len(articles_to_store)} recent articles")
+        else:
+            logging.info("No recent articles to store")
+        return True
+
+    except Exception as e:
+        logging.error(f"Error fetching news: {str(e)}")
+        return False
diff --git a/src/services/__init__.py b/src/services/__init__.py
index 2ed3e7a..29b5c31 100644
--- a/src/services/__init__.py
+++ b/src/services/__init__.py
@@ -1,3 +1,4 @@
 from .game_service import GameService
 from .team_service import TeamService
-from .youtube_video_service import YoutubeVideoService
\ No newline at end of file
+from .youtube_video_service import YoutubeVideoService
+from .article_service import ArticleService
\ No newline at end of file
diff --git a/src/services/article_service.py b/src/services/article_service.py
new file mode 100644
index 0000000..77da243
--- /dev/null
+++ b/src/services/article_service.py
@@ -0,0 +1,73 @@
+from src.database import daily_sun_db
+from src.models.article import Article
+from src.repositories.article_repository import ArticleRepository
+from datetime import datetime, timedelta
+import logging
+
+class ArticleService:
+    @staticmethod
+    def get_articles(sports_type=None):
+        """
+        Retrieve all articles from the last 3 days, optionally filtered by sports_type, sorted by published_at descending.
+        """
+        try:
+            if sports_type:
+                return ArticleRepository.find_by_sports_type(sports_type)
+            return ArticleRepository.find_recent()
+        except Exception as e:
+            logging.error(f"Error retrieving articles: {str(e)}")
+            return []
+
+    @staticmethod
+    def create_article(article_data):
+        """
+        Create a single article and store it in MongoDB.
+        """
+        try:
+            article = Article(
+                title=article_data["title"],
+                sports_type=article_data["sports_type"],
+                published_at=article_data["published_at"],
+                url=article_data["url"],
+                slug=article_data["slug"],
+                image=article_data.get("image")
+            )
+            return ArticleRepository.upsert(article)
+        except Exception as e:
+            logging.error(f"Error creating article: {str(e)}")
+            return None
+
+    @staticmethod
+    def create_articles_bulk(articles_data):
+        """
+        Create or update multiple articles in bulk and store them in MongoDB.
+        """
+        try:
+            if not articles_data:
+                return
+            articles = [
+                Article(
+                    title=data["title"],
+                    sports_type=data["sports_type"],
+                    published_at=data["published_at"],
+                    url=data["url"],
+                    slug=data["slug"],
+                    image=data.get("image")
+                )
+                for data in articles_data
+            ]
+            ArticleRepository.bulk_upsert(articles)
+        except Exception as e:
+            logging.error(f"Error creating articles in bulk: {str(e)}")
+            raise
+
+    @staticmethod
+    def cleanse_old_articles():
+        """
+        Remove articles older than 3 days from the database.
+        """
+        try:
+            ArticleRepository.delete_not_recent(limit_days=5) # provide a buffer from the 3-day threshold
+        except Exception as e:
+            logging.error(f"Error cleansing old articles: {str(e)}")
+            raise
\ No newline at end of file
diff --git a/src/types.py b/src/types.py
index 830e1e2..88ceb36 100644
--- a/src/types.py
+++ b/src/types.py
@@ -1,5 +1,5 @@
 from graphene import ObjectType, Field, String, List, Int
-from src.services import TeamService
+from datetime import datetime
 
 class TeamType(ObjectType):
     """
@@ -159,4 +159,29 @@ class YoutubeVideoType(ObjectType):
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
-            setattr(self, key, value)
\ No newline at end of file
+            setattr(self, key, value)
+
+class ArticleType(ObjectType):
+    """
+    A GraphQL type representing a news article.
+
+    Attributes:
+        - title: The title of the article
+        - image: The filename of the article's main image
+        - sports_type: The specific sport category
+        - published_at: The publication date
+        - url: The URL to the full article
+    """
+    id = String()
+    title = String(required=True)
+    image = String()
+    sports_type = String(required=True)
+    published_at = String(required=True)
+    url = String(required=True)
+
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if key == "published_at" and isinstance(value, datetime):
+                setattr(self, key, value.isoformat())
+            else:
+                setattr(self, key, value)
\ No newline at end of file

From cc9ebd1b3bf9ef36b7fc78e1423afbea26c7644a Mon Sep 17 00:00:00 2001
From: Mateo <maw346@cornell.edu>
Date: Thu, 27 Mar 2025 00:48:11 -0400
Subject: [PATCH 02/30] update env template

---
 .env_template | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.env_template b/.env_template
index 49ea0bd..56b5add 100644
--- a/.env_template
+++ b/.env_template
@@ -2,3 +2,4 @@ YOUTUBE_API_KEY=
 MONGO_URI=
 MONGO_DB=
 STAGE=
+DAILY_SUN_URL=
\ No newline at end of file

From 9d1792c7144cd841087a7df5d1662ae47da58291 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Fri, 12 Sep 2025 21:19:18 -0400
Subject: [PATCH 03/30] Added logic to prevent adding duplicate games when
 scraping

---
 src/database.py               | 17 ++++++++++++++++-
 src/scrapers/games_scraper.py | 17 ++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/database.py b/src/database.py
index d475437..1ec4096 100644
--- a/src/database.py
+++ b/src/database.py
@@ -48,7 +48,7 @@ def keep_connection_alive():
 
 # Access the database
 db = client[os.getenv("MONGO_DB", "score_db")]
-
+print("Total games in DB:", db["game"].count_documents({}))
 
 def setup_database_indexes():
     """Set up MongoDB indexes for optimal query performance"""
@@ -65,6 +65,21 @@ def setup_database_indexes():
 
         # Index for sorting operations
         game_collection.create_index([("date", -1)], background=True)
+        
+        # Index to have unique games so we won't add duplicates
+        game_collection.create_index(
+            [
+                ("sport", 1),
+                ("gender", 1),
+                ("date", 1),
+                ("opponent_id", 1),
+                ("city", 1),
+                ("state", 1),
+                ("location", 1),
+            ],
+            unique=True,
+            background=True
+        )
 
         print("✅ MongoDB indexes created successfully")
     except Exception as e:
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index e174a65..a92e87c 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -7,7 +7,7 @@
 from src.utils.helpers import get_dominant_color
 import base64
 import re
-import html
+from src.database import db
 import threading
 
 
@@ -272,5 +272,20 @@ def process_game_data(game_data):
         "score_breakdown": game_data["score_breakdown"],
         "utc_date": utc_date_str
     }
+    
+    # update the game if it exists, otherwise insert it as a new game.
+    db.game.update_one(
+        {
+            "sport": game_data["sport"],
+            "gender": game_data["gender"],
+            "date": game_data["date"],
+            "opponent_id": game_data["opponent_id"],
+            "city": game_data["city"],
+            "state": game_data["state"],
+            "location": game_data["location"],
+        },
+        {"$set": game_data},
+        upsert=True
+    )
         
     GameService.create_game(game_data)
\ No newline at end of file

From d205803c58c124ea8074ab2075189a6bdb634846 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Fri, 12 Sep 2025 21:49:40 -0400
Subject: [PATCH 04/30] Reworked service methods to check for duplicates and
 fixed game scraping issue

---
 src/scrapers/games_scraper.py | 15 ---------------
 src/services/game_service.py  | 28 ++++++++++++++++++++++++++++
 src/services/team_service.py  | 21 +++++++++++++--------
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index a92e87c..cd71dd0 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -273,19 +273,4 @@ def process_game_data(game_data):
         "utc_date": utc_date_str
     }
     
-    # update the game if it exists, otherwise insert it as a new game.
-    db.game.update_one(
-        {
-            "sport": game_data["sport"],
-            "gender": game_data["gender"],
-            "date": game_data["date"],
-            "opponent_id": game_data["opponent_id"],
-            "city": game_data["city"],
-            "state": game_data["state"],
-            "location": game_data["location"],
-        },
-        {"$set": game_data},
-        upsert=True
-    )
-        
     GameService.create_game(game_data)
\ No newline at end of file
diff --git a/src/services/game_service.py b/src/services/game_service.py
index 5463835..ae8381e 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -33,6 +33,34 @@ def create_game(data):
         opponent_id = data.get("opponent_id")
         if not TeamService.get_team_by_id(opponent_id):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
+        
+        existing = GameService.get_game_by_key_fields(
+            data["city"],
+            data["date"],
+            data["gender"],
+            data["location"],
+            data["opponent_id"],
+            data["sport"],
+            data["state"],
+        )
+
+        #check if game already exists
+        if existing:
+            if isinstance(existing, list) and existing:
+                existing = existing[0]
+
+            # update existing game
+            updates = {
+                "time": data.get("time"),
+                "result": data.get("result"),
+                "box_score": data.get("box_score"),
+                "score_breakdown": data.get("score_breakdown"),
+                "utc_date": data.get("utc_date"),
+            }
+            GameService.update_game(existing.id, updates)
+            return existing
+
+        # create new game if it doesn't exist
         game = Game(**data)
         GameRepository.insert(game)
         return game
diff --git a/src/services/team_service.py b/src/services/team_service.py
index 57598f8..7127d21 100644
--- a/src/services/team_service.py
+++ b/src/services/team_service.py
@@ -1,7 +1,6 @@
 from src.repositories import TeamRepository
 from src.models.team import Team
 
-
 class TeamService:
     @staticmethod
     def get_all_teams():
@@ -13,14 +12,20 @@ def get_all_teams():
     @staticmethod
     def create_team(team_data):
         """
-        Create a new team.
-
-        Args:
-            team_data (dict): The data for the new team.
-
-        Returns:
-            Team: The created team.
+        Create a new team, or update it if it already exists.
         """
+        name = team_data.get("name")
+        if not name:
+            raise ValueError("Team name is required to create a team.")
+        
+        existing = TeamService.get_team_by_name(name)
+        if existing:
+            if isinstance(existing, list) and existing:
+                existing = existing[0]
+
+            TeamService.update_team(existing.id, team_data)
+            return existing
+
         team = Team(**team_data)
         TeamRepository.insert(team)
         return team

From e1f080c42d66a3c62645570fb4db0adce2aa5a6e Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:04:43 -0400
Subject: [PATCH 05/30] Added tournament handling and TBD/TBA updates for games

---
 src/database.py                     | 14 ++++-
 src/repositories/game_repository.py | 78 ++++++++++++++++++++++++++
 src/scrapers/games_scraper.py       | 35 ++++++++++--
 src/services/game_service.py        | 86 ++++++++++++++++++++---------
 src/utils/helpers.py                | 42 +++++++++++++-
 5 files changed, 220 insertions(+), 35 deletions(-)

diff --git a/src/database.py b/src/database.py
index 1ec4096..5ea74a4 100644
--- a/src/database.py
+++ b/src/database.py
@@ -73,13 +73,23 @@ def setup_database_indexes():
                 ("gender", 1),
                 ("date", 1),
                 ("opponent_id", 1),
-                ("city", 1),
                 ("state", 1),
-                ("location", 1),
             ],
             unique=True,
             background=True
         )
+        
+        # Additional index for tournament games (without opponent_id)
+        game_collection.create_index(
+            [
+                ("sport", 1),
+                ("gender", 1),
+                ("date", 1),
+                ("city", 1),
+                ("state", 1),
+            ],
+            background=True
+        )
 
         print("✅ MongoDB indexes created successfully")
     except Exception as e:
diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py
index bfe5d08..4f05b2f 100644
--- a/src/repositories/game_repository.py
+++ b/src/repositories/game_repository.py
@@ -130,6 +130,56 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state):
 
         return [Game.from_dict(game) for game in games]
 
+    @staticmethod
+    def find_by_tournament_key_fields(city, date, gender, location, sport, state):
+        """
+        Find tournament games by location and date (excluding opponent_id).
+        This is used when we need to find a tournament game that might have a placeholder team.
+        Uses flexible matching to handle TBD/TBA values.
+        """
+        game_collection = db["game"]
+        
+        # Build flexible query that can handle TBD/TBA values
+        query = {
+            "date": date,
+            "gender": gender,
+            "sport": sport,
+        }
+        
+        # For city, state, and location, use flexible matching
+        # This allows finding games even when TBD/TBA values change to real values
+        city_conditions = []
+        if city:
+            city_conditions.append(city)
+        else:
+            city_conditions = [None]
+        
+        state_conditions = []
+        if state:
+            state_conditions.append(state)
+        else:
+            state_conditions = [None]
+        
+        location_conditions = []
+        if location:
+            location_conditions.append(location)
+        else:
+            location_conditions = [None]
+        
+        query["city"] = {"$in": city_conditions}
+        query["state"] = {"$in": state_conditions}
+        query["location"] = {"$in": location_conditions}
+        
+        games = list(game_collection.find(query))
+
+        if not games:
+            return None
+
+        if len(games) == 1:
+            return Game.from_dict(games[0])
+
+        return [Game.from_dict(game) for game in games]
+
     @staticmethod
     def find_by_sport(sport):
         """
@@ -156,3 +206,31 @@ def find_by_sport_gender(sport, gender):
         game_collection = db["game"]
         games = game_collection.find({"sport": sport, "gender": gender})
         return [Game.from_dict(game) for game in games]
+
+    @staticmethod
+    def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
+        """
+        Find games for a specific sport and gender, optionally after a specific date.
+        This method returns raw game data without team information.
+        """
+        game_collection = db["game"]
+        
+        query = {
+            "sport": sport,
+            "gender": gender
+        }
+        
+        if after_date:
+            query["utc_date"] = {"$gt": after_date}
+        
+        games = game_collection.find(query)
+        return [Game.from_dict(game) for game in games]
+
+    @staticmethod
+    def delete_games_by_ids(game_ids):
+        """
+        Delete games by their IDs.
+        """
+        game_collection = db["game"]
+        result = game_collection.delete_many({"_id": {"$in": game_ids}})
+        return result.deleted_count
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index cd71dd0..da43692 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -4,7 +4,7 @@
 from src.utils.convert_to_utc import convert_to_utc
 from src.utils.constants import *
 from src.scrapers.game_details_scrape import scrape_game
-from src.utils.helpers import get_dominant_color
+from src.utils.helpers import get_dominant_color, normalize_game_data, is_tournament_placeholder_team, is_cornell_loss
 import base64
 import re
 from src.database import db
@@ -164,6 +164,8 @@ def process_game_data(game_data):
     Args:
         game_data (dict): A dictionary containing the game data.
     """
+    
+    game_data = normalize_game_data(game_data)
     location_data = game_data["location"].split("\n")
     geo_location = location_data[0]
     if (",") not in geo_location:
@@ -232,16 +234,28 @@ def process_game_data(game_data):
             if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
                 game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
 
-    # finds any existing game with the same key fields regardless of time
-    curr_game = GameService.get_game_by_key_fields(
+    # Try to find by tournament key fields to handle placeholder teams
+    curr_game = GameService.get_game_by_tournament_key_fields(
         city,
         game_data["date"],
         game_data["gender"],
         location,
-        team.id,
         game_data["sport"],
         state
     )
+    
+    # If no tournament game found, try the regular lookup with opponent_id
+    if not curr_game:
+        curr_game = GameService.get_game_by_key_fields(
+            city,
+            game_data["date"],
+            game_data["gender"],
+            location,
+            team.id,
+            game_data["sport"],
+            state
+        )
+
     if isinstance(curr_game, list):
         if curr_game:
             curr_game = curr_game[0]
@@ -253,8 +267,19 @@ def process_game_data(game_data):
             "result": game_data["result"],
             "box_score": game_data["box_score"],
             "score_breakdown": game_data["score_breakdown"],
-            "utc_date": utc_date_str
+            "utc_date": utc_date_str,
+            "city": city,
+            "location": location,
+            "state": state
         }
+        
+        current_team = TeamService.get_team_by_id(curr_game.opponent_id)
+        if current_team and is_tournament_placeholder_team(current_team.name):
+            updates["opponent_id"] = team.id
+            
+            if is_cornell_loss(game_data["result"]) and game_data["utc_date"]:
+                GameService.handle_tournament_loss(game_data["sport"], game_data["gender"], game_data["utc_date"])
+                        
         GameService.update_game(curr_game.id, updates)
         return
         
diff --git a/src/services/game_service.py b/src/services/game_service.py
index ae8381e..6352dd0 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -1,6 +1,7 @@
 from src.repositories.game_repository import GameRepository
 from src.models.game import Game
 from src.services.team_service import TeamService
+from src.utils.helpers import is_tournament_placeholder_team
 
 
 class GameService:
@@ -33,34 +34,7 @@ def create_game(data):
         opponent_id = data.get("opponent_id")
         if not TeamService.get_team_by_id(opponent_id):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
-        
-        existing = GameService.get_game_by_key_fields(
-            data["city"],
-            data["date"],
-            data["gender"],
-            data["location"],
-            data["opponent_id"],
-            data["sport"],
-            data["state"],
-        )
 
-        #check if game already exists
-        if existing:
-            if isinstance(existing, list) and existing:
-                existing = existing[0]
-
-            # update existing game
-            updates = {
-                "time": data.get("time"),
-                "result": data.get("result"),
-                "box_score": data.get("box_score"),
-                "score_breakdown": data.get("score_breakdown"),
-                "utc_date": data.get("utc_date"),
-            }
-            GameService.update_game(existing.id, updates)
-            return existing
-
-        # create new game if it doesn't exist
         game = Game(**data)
         GameRepository.insert(game)
         return game
@@ -97,6 +71,16 @@ def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, sta
             city, date, gender, location, opponent_id, sport, state
         )
 
+    @staticmethod
+    def get_game_by_tournament_key_fields(city, date, gender, location, sport, state):
+        """
+        Retrieve a tournament game by location and date (excluding opponent_id).
+        This is used when we need to find a tournament game that might have a placeholder team.
+        """
+        return GameRepository.find_by_tournament_key_fields(
+            city, date, gender, location, sport, state
+        )
+
     @staticmethod
     def get_games_by_sport(sport):
         """
@@ -117,3 +101,51 @@ def get_games_by_sport_gender(sport, gender):
         Retrieves all game by its sport and gender.
         """
         return GameRepository.find_by_sport_gender(sport, gender)
+
+    @staticmethod
+    def get_tournament_games_by_sport_gender(sport, gender, after_date=None):
+        """
+        Find tournament games (with placeholder team names) for a specific sport and gender.
+        Optionally filter by games after a specific date.
+        """
+        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
+        tournament_games = []
+        
+        for game in games:
+            team = TeamService.get_team_by_id(game.opponent_id)
+            if team and is_tournament_placeholder_team(team.name):
+                tournament_games.append(game)
+        
+        return tournament_games
+
+    @staticmethod
+    def delete_tournament_games_by_sport_gender(sport, gender, after_date=None):
+        """
+        Delete tournament games (with placeholder team names) for a specific sport and gender.
+        Optionally filter by games after a specific date.
+        """
+        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
+        tournament_game_ids = []
+        
+        for game in games:
+            team = TeamService.get_team_by_id(game.opponent_id)
+            if team and is_tournament_placeholder_team(team.name):
+                tournament_game_ids.append(game.id)
+        
+        if tournament_game_ids:
+            return GameRepository.delete_games_by_ids(tournament_game_ids)
+        return 0
+
+    @staticmethod
+    def handle_tournament_loss(sport, gender, loss_date):
+        """
+        Handle when a Cornell team loses in a tournament by deleting future tournament games.
+        
+        Args:
+            sport (str): The sport of the team that lost
+            gender (str): The gender of the team that lost  
+            loss_date (datetime): The date when the team lost
+        """
+        deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
+        print(f"Deleted {deleted_count} future tournament games for {gender} {sport} after loss on {loss_date}")
+        return deleted_count
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index 0866f79..cb3d759 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -54,4 +54,44 @@ def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
         return hex_color
     except Exception as e:
         logging.error(f"Error in get_dominant_color for {image_url}: {e}")
-        return default_color
\ No newline at end of file
+        return default_color
+    
+def normalize_game_data(data: dict):
+    """
+    Normalize placeholder values like TBA/TBD into None.
+    """
+    placeholders = {"TBA", "TBD", "tba", "tbd"}
+
+    for field in ["time", "city", "state"]:
+        if data.get(field) in placeholders:
+            data[field] = None
+
+    return data
+
+def is_tournament_placeholder_team(team_name: str):
+    """
+    Check if a team name is a tournament placeholder.
+    """
+    
+    placeholder_team_names = [
+        "First Round", "Second Round", "Third Round", "Quarterfinals",
+        "College Cup Semifinals", "College Cup Championship Game",
+        "ECAC Hockey First Round", "ECAC Hockey Quarterfinals",
+        "ECAC Hockey Semifinals", "ECAC Hockey Championship Game",
+        "Regional Semifinals", "Regional Championship", "National Semifinals",
+        "TBD", "National Championship", "NCAA Wrestling Championships", "NCAA Northeast Regional CHampionships",
+        "NCAA Cross Country Championships", 
+    ]
+    return team_name in placeholder_team_names
+
+def is_cornell_loss(result: str):
+    """
+    Check if the result indicates a Cornell loss.
+    """
+    
+    if not result:
+        return False
+    
+    # Common loss indicators in result strings
+    loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
+    return any(indicator in result for indicator in loss_indicators)
\ No newline at end of file

From e2ac05f1c076dde746f864127ca880cac6114ef0 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:10:14 -0400
Subject: [PATCH 06/30] remove comments that trigger alerts

---
 src/services/game_service.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/services/game_service.py b/src/services/game_service.py
index 6352dd0..95d31ed 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -147,5 +147,4 @@ def handle_tournament_loss(sport, gender, loss_date):
             loss_date (datetime): The date when the team lost
         """
         deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
-        print(f"Deleted {deleted_count} future tournament games for {gender} {sport} after loss on {loss_date}")
         return deleted_count

From 756ec32d0ee21db6caa8a261e8072c2654491ecd Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:12:32 -0400
Subject: [PATCH 07/30] remove db game count logging

---
 src/database.py              | 1 -
 src/services/team_service.py | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/database.py b/src/database.py
index 5ea74a4..177a1ee 100644
--- a/src/database.py
+++ b/src/database.py
@@ -48,7 +48,6 @@ def keep_connection_alive():
 
 # Access the database
 db = client[os.getenv("MONGO_DB", "score_db")]
-print("Total games in DB:", db["game"].count_documents({}))
 
 def setup_database_indexes():
     """Set up MongoDB indexes for optimal query performance"""
diff --git a/src/services/team_service.py b/src/services/team_service.py
index 7127d21..c961534 100644
--- a/src/services/team_service.py
+++ b/src/services/team_service.py
@@ -13,6 +13,11 @@ def get_all_teams():
     def create_team(team_data):
         """
         Create a new team, or update it if it already exists.
+        
+        Args:
+            team_data (dict): The data for the new team.
+        Returns:
+            Team: The created team.
         """
         name = team_data.get("name")
         if not name:

From 907fe84f41b2b296c67e2456659794384fe91ef9 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Wed, 24 Sep 2025 17:44:36 -0400
Subject: [PATCH 08/30] Revert "Merge pull request #36 from
 cuappdev/fix-duplicate-games"

This reverts commit 721f0f8a06eb0e3dc8ada6db08d8d4aad685f716, reversing
changes made to bd146590bf0e0690cbfb39ab34adbdb88ed7af68.
---
 src/database.py                     | 26 +---------
 src/repositories/game_repository.py | 78 -----------------------------
 src/scrapers/games_scraper.py       | 39 +++------------
 src/services/game_service.py        | 59 ----------------------
 src/services/team_service.py        | 18 ++-----
 src/utils/helpers.py                | 42 +---------------
 6 files changed, 13 insertions(+), 249 deletions(-)

diff --git a/src/database.py b/src/database.py
index 177a1ee..d475437 100644
--- a/src/database.py
+++ b/src/database.py
@@ -49,6 +49,7 @@ def keep_connection_alive():
 # Access the database
 db = client[os.getenv("MONGO_DB", "score_db")]
 
+
 def setup_database_indexes():
     """Set up MongoDB indexes for optimal query performance"""
     try:
@@ -64,31 +65,6 @@ def setup_database_indexes():
 
         # Index for sorting operations
         game_collection.create_index([("date", -1)], background=True)
-        
-        # Index to have unique games so we won't add duplicates
-        game_collection.create_index(
-            [
-                ("sport", 1),
-                ("gender", 1),
-                ("date", 1),
-                ("opponent_id", 1),
-                ("state", 1),
-            ],
-            unique=True,
-            background=True
-        )
-        
-        # Additional index for tournament games (without opponent_id)
-        game_collection.create_index(
-            [
-                ("sport", 1),
-                ("gender", 1),
-                ("date", 1),
-                ("city", 1),
-                ("state", 1),
-            ],
-            background=True
-        )
 
         print("✅ MongoDB indexes created successfully")
     except Exception as e:
diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py
index 4f05b2f..bfe5d08 100644
--- a/src/repositories/game_repository.py
+++ b/src/repositories/game_repository.py
@@ -130,56 +130,6 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state):
 
         return [Game.from_dict(game) for game in games]
 
-    @staticmethod
-    def find_by_tournament_key_fields(city, date, gender, location, sport, state):
-        """
-        Find tournament games by location and date (excluding opponent_id).
-        This is used when we need to find a tournament game that might have a placeholder team.
-        Uses flexible matching to handle TBD/TBA values.
-        """
-        game_collection = db["game"]
-        
-        # Build flexible query that can handle TBD/TBA values
-        query = {
-            "date": date,
-            "gender": gender,
-            "sport": sport,
-        }
-        
-        # For city, state, and location, use flexible matching
-        # This allows finding games even when TBD/TBA values change to real values
-        city_conditions = []
-        if city:
-            city_conditions.append(city)
-        else:
-            city_conditions = [None]
-        
-        state_conditions = []
-        if state:
-            state_conditions.append(state)
-        else:
-            state_conditions = [None]
-        
-        location_conditions = []
-        if location:
-            location_conditions.append(location)
-        else:
-            location_conditions = [None]
-        
-        query["city"] = {"$in": city_conditions}
-        query["state"] = {"$in": state_conditions}
-        query["location"] = {"$in": location_conditions}
-        
-        games = list(game_collection.find(query))
-
-        if not games:
-            return None
-
-        if len(games) == 1:
-            return Game.from_dict(games[0])
-
-        return [Game.from_dict(game) for game in games]
-
     @staticmethod
     def find_by_sport(sport):
         """
@@ -206,31 +156,3 @@ def find_by_sport_gender(sport, gender):
         game_collection = db["game"]
         games = game_collection.find({"sport": sport, "gender": gender})
         return [Game.from_dict(game) for game in games]
-
-    @staticmethod
-    def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
-        """
-        Find games for a specific sport and gender, optionally after a specific date.
-        This method returns raw game data without team information.
-        """
-        game_collection = db["game"]
-        
-        query = {
-            "sport": sport,
-            "gender": gender
-        }
-        
-        if after_date:
-            query["utc_date"] = {"$gt": after_date}
-        
-        games = game_collection.find(query)
-        return [Game.from_dict(game) for game in games]
-
-    @staticmethod
-    def delete_games_by_ids(game_ids):
-        """
-        Delete games by their IDs.
-        """
-        game_collection = db["game"]
-        result = game_collection.delete_many({"_id": {"$in": game_ids}})
-        return result.deleted_count
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index da43692..e174a65 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -4,10 +4,10 @@
 from src.utils.convert_to_utc import convert_to_utc
 from src.utils.constants import *
 from src.scrapers.game_details_scrape import scrape_game
-from src.utils.helpers import get_dominant_color, normalize_game_data, is_tournament_placeholder_team, is_cornell_loss
+from src.utils.helpers import get_dominant_color
 import base64
 import re
-from src.database import db
+import html
 import threading
 
 
@@ -164,8 +164,6 @@ def process_game_data(game_data):
     Args:
         game_data (dict): A dictionary containing the game data.
     """
-    
-    game_data = normalize_game_data(game_data)
     location_data = game_data["location"].split("\n")
     geo_location = location_data[0]
     if (",") not in geo_location:
@@ -234,28 +232,16 @@ def process_game_data(game_data):
             if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
                 game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
 
-    # Try to find by tournament key fields to handle placeholder teams
-    curr_game = GameService.get_game_by_tournament_key_fields(
+    # finds any existing game with the same key fields regardless of time
+    curr_game = GameService.get_game_by_key_fields(
         city,
         game_data["date"],
         game_data["gender"],
         location,
+        team.id,
         game_data["sport"],
         state
     )
-    
-    # If no tournament game found, try the regular lookup with opponent_id
-    if not curr_game:
-        curr_game = GameService.get_game_by_key_fields(
-            city,
-            game_data["date"],
-            game_data["gender"],
-            location,
-            team.id,
-            game_data["sport"],
-            state
-        )
-
     if isinstance(curr_game, list):
         if curr_game:
             curr_game = curr_game[0]
@@ -267,19 +253,8 @@ def process_game_data(game_data):
             "result": game_data["result"],
             "box_score": game_data["box_score"],
             "score_breakdown": game_data["score_breakdown"],
-            "utc_date": utc_date_str,
-            "city": city,
-            "location": location,
-            "state": state
+            "utc_date": utc_date_str
         }
-        
-        current_team = TeamService.get_team_by_id(curr_game.opponent_id)
-        if current_team and is_tournament_placeholder_team(current_team.name):
-            updates["opponent_id"] = team.id
-            
-            if is_cornell_loss(game_data["result"]) and game_data["utc_date"]:
-                GameService.handle_tournament_loss(game_data["sport"], game_data["gender"], game_data["utc_date"])
-                        
         GameService.update_game(curr_game.id, updates)
         return
         
@@ -297,5 +272,5 @@ def process_game_data(game_data):
         "score_breakdown": game_data["score_breakdown"],
         "utc_date": utc_date_str
     }
-    
+        
     GameService.create_game(game_data)
\ No newline at end of file
diff --git a/src/services/game_service.py b/src/services/game_service.py
index 95d31ed..5463835 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -1,7 +1,6 @@
 from src.repositories.game_repository import GameRepository
 from src.models.game import Game
 from src.services.team_service import TeamService
-from src.utils.helpers import is_tournament_placeholder_team
 
 
 class GameService:
@@ -34,7 +33,6 @@ def create_game(data):
         opponent_id = data.get("opponent_id")
         if not TeamService.get_team_by_id(opponent_id):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
-
         game = Game(**data)
         GameRepository.insert(game)
         return game
@@ -71,16 +69,6 @@ def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, sta
             city, date, gender, location, opponent_id, sport, state
         )
 
-    @staticmethod
-    def get_game_by_tournament_key_fields(city, date, gender, location, sport, state):
-        """
-        Retrieve a tournament game by location and date (excluding opponent_id).
-        This is used when we need to find a tournament game that might have a placeholder team.
-        """
-        return GameRepository.find_by_tournament_key_fields(
-            city, date, gender, location, sport, state
-        )
-
     @staticmethod
     def get_games_by_sport(sport):
         """
@@ -101,50 +89,3 @@ def get_games_by_sport_gender(sport, gender):
         Retrieves all game by its sport and gender.
         """
         return GameRepository.find_by_sport_gender(sport, gender)
-
-    @staticmethod
-    def get_tournament_games_by_sport_gender(sport, gender, after_date=None):
-        """
-        Find tournament games (with placeholder team names) for a specific sport and gender.
-        Optionally filter by games after a specific date.
-        """
-        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
-        tournament_games = []
-        
-        for game in games:
-            team = TeamService.get_team_by_id(game.opponent_id)
-            if team and is_tournament_placeholder_team(team.name):
-                tournament_games.append(game)
-        
-        return tournament_games
-
-    @staticmethod
-    def delete_tournament_games_by_sport_gender(sport, gender, after_date=None):
-        """
-        Delete tournament games (with placeholder team names) for a specific sport and gender.
-        Optionally filter by games after a specific date.
-        """
-        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
-        tournament_game_ids = []
-        
-        for game in games:
-            team = TeamService.get_team_by_id(game.opponent_id)
-            if team and is_tournament_placeholder_team(team.name):
-                tournament_game_ids.append(game.id)
-        
-        if tournament_game_ids:
-            return GameRepository.delete_games_by_ids(tournament_game_ids)
-        return 0
-
-    @staticmethod
-    def handle_tournament_loss(sport, gender, loss_date):
-        """
-        Handle when a Cornell team loses in a tournament by deleting future tournament games.
-        
-        Args:
-            sport (str): The sport of the team that lost
-            gender (str): The gender of the team that lost  
-            loss_date (datetime): The date when the team lost
-        """
-        deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
-        return deleted_count
diff --git a/src/services/team_service.py b/src/services/team_service.py
index c961534..57598f8 100644
--- a/src/services/team_service.py
+++ b/src/services/team_service.py
@@ -1,6 +1,7 @@
 from src.repositories import TeamRepository
 from src.models.team import Team
 
+
 class TeamService:
     @staticmethod
     def get_all_teams():
@@ -12,25 +13,14 @@ def get_all_teams():
     @staticmethod
     def create_team(team_data):
         """
-        Create a new team, or update it if it already exists.
-        
+        Create a new team.
+
         Args:
             team_data (dict): The data for the new team.
+
         Returns:
             Team: The created team.
         """
-        name = team_data.get("name")
-        if not name:
-            raise ValueError("Team name is required to create a team.")
-        
-        existing = TeamService.get_team_by_name(name)
-        if existing:
-            if isinstance(existing, list) and existing:
-                existing = existing[0]
-
-            TeamService.update_team(existing.id, team_data)
-            return existing
-
         team = Team(**team_data)
         TeamRepository.insert(team)
         return team
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index cb3d759..0866f79 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -54,44 +54,4 @@ def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
         return hex_color
     except Exception as e:
         logging.error(f"Error in get_dominant_color for {image_url}: {e}")
-        return default_color
-    
-def normalize_game_data(data: dict):
-    """
-    Normalize placeholder values like TBA/TBD into None.
-    """
-    placeholders = {"TBA", "TBD", "tba", "tbd"}
-
-    for field in ["time", "city", "state"]:
-        if data.get(field) in placeholders:
-            data[field] = None
-
-    return data
-
-def is_tournament_placeholder_team(team_name: str):
-    """
-    Check if a team name is a tournament placeholder.
-    """
-    
-    placeholder_team_names = [
-        "First Round", "Second Round", "Third Round", "Quarterfinals",
-        "College Cup Semifinals", "College Cup Championship Game",
-        "ECAC Hockey First Round", "ECAC Hockey Quarterfinals",
-        "ECAC Hockey Semifinals", "ECAC Hockey Championship Game",
-        "Regional Semifinals", "Regional Championship", "National Semifinals",
-        "TBD", "National Championship", "NCAA Wrestling Championships", "NCAA Northeast Regional CHampionships",
-        "NCAA Cross Country Championships", 
-    ]
-    return team_name in placeholder_team_names
-
-def is_cornell_loss(result: str):
-    """
-    Check if the result indicates a Cornell loss.
-    """
-    
-    if not result:
-        return False
-    
-    # Common loss indicators in result strings
-    loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
-    return any(indicator in result for indicator in loss_indicators)
\ No newline at end of file
+        return default_color
\ No newline at end of file

From 1de4e00579970911e650dddc319d9d85ae472a1f Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Wed, 24 Sep 2025 17:44:54 -0400
Subject: [PATCH 09/30] revert pr

---
 binary_segregation.py | 94 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 binary_segregation.py

diff --git a/binary_segregation.py b/binary_segregation.py
new file mode 100644
index 0000000..fbc538d
--- /dev/null
+++ b/binary_segregation.py
@@ -0,0 +1,94 @@
+def getMaxCost(s):
+    """
+    Calculate the maximum possible cost to segregate a binary string.
+    
+    Rules:
+    - A "1" can be moved to the right until it reaches the end or another "1"
+    - Cost = 1 + number of places moved
+    - Each "1" must be moved to its maximum possible position
+    
+    The strategy is to maximize the total cost by ensuring each '1' moves as far as possible.
+    We can do this by moving '1's one step at a time to maximize the number of operations.
+    
+    Args:
+        s (str): Binary string containing only '0' and '1'
+    
+    Returns:
+        int: Maximum possible cost to segregate the string
+    """
+    n = len(s)
+    ones_count = s.count('1')
+    
+    if ones_count == 0:
+        return 0
+    
+    # Convert to list for easier manipulation
+    arr = list(s)
+    total_cost = 0
+    
+    # Move each '1' to its maximum possible position
+    # We'll process from left to right and move each '1' as far right as possible
+    for i in range(n):
+        if arr[i] == '1':
+            # Find the rightmost position this '1' can move to
+            # It can move until it hits another '1' or the end
+            j = i
+            while j < n - 1 and arr[j + 1] != '1':
+                # Move this '1' one position to the right
+                arr[j], arr[j + 1] = arr[j + 1], arr[j]
+                j += 1
+                total_cost += 1 + 1  # Cost = 1 + distance (distance = 1 for each step)
+    
+    return total_cost
+
+
+def getMaxCostOptimized(s):
+    """
+    Optimized version that calculates cost without explicitly tracking positions.
+    """
+    n = len(s)
+    total_cost = 0
+    ones_count = 0
+    
+    # Process from right to left
+    for i in range(n - 1, -1, -1):
+        if s[i] == '1':
+            # This '1' can move to position (n - 1 - ones_count)
+            # Distance = (n - 1 - ones_count) - i
+            distance = (n - 1 - ones_count) - i
+            if distance > 0:
+                total_cost += 1 + distance
+            ones_count += 1
+    
+    return total_cost
+
+
+# Test with the provided example
+if __name__ == "__main__":
+    # Test case from the problem
+    s = "110100"
+    result = getMaxCost(s)
+    print(f"Input: {s}")
+    print(f"Maximum cost: {result}")
+    
+    # Test with optimized version
+    result_opt = getMaxCostOptimized(s)
+    print(f"Optimized result: {result_opt}")
+    
+    # Additional test cases
+    test_cases = [
+        "110100",  # Expected: 13
+        "111000",  # All 1s at start
+        "000111",  # All 1s at end
+        "101010",  # Alternating
+        "100000",  # Single 1 at start
+        "000001",  # Single 1 at end
+        "111111",  # All 1s
+        "000000",  # All 0s
+    ]
+    
+    print("\nTesting additional cases:")
+    for test in test_cases:
+        cost = getMaxCost(test)
+        cost_opt = getMaxCostOptimized(test)
+        print(f"{test}: {cost} (both methods: {cost == cost_opt})")

From bbe2408952872197c75dff80561166cef2180a98 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Wed, 24 Sep 2025 17:45:17 -0400
Subject: [PATCH 10/30] revert pr

---
 binary_segregation.py | 94 -------------------------------------------
 1 file changed, 94 deletions(-)
 delete mode 100644 binary_segregation.py

diff --git a/binary_segregation.py b/binary_segregation.py
deleted file mode 100644
index fbc538d..0000000
--- a/binary_segregation.py
+++ /dev/null
@@ -1,94 +0,0 @@
-def getMaxCost(s):
-    """
-    Calculate the maximum possible cost to segregate a binary string.
-    
-    Rules:
-    - A "1" can be moved to the right until it reaches the end or another "1"
-    - Cost = 1 + number of places moved
-    - Each "1" must be moved to its maximum possible position
-    
-    The strategy is to maximize the total cost by ensuring each '1' moves as far as possible.
-    We can do this by moving '1's one step at a time to maximize the number of operations.
-    
-    Args:
-        s (str): Binary string containing only '0' and '1'
-    
-    Returns:
-        int: Maximum possible cost to segregate the string
-    """
-    n = len(s)
-    ones_count = s.count('1')
-    
-    if ones_count == 0:
-        return 0
-    
-    # Convert to list for easier manipulation
-    arr = list(s)
-    total_cost = 0
-    
-    # Move each '1' to its maximum possible position
-    # We'll process from left to right and move each '1' as far right as possible
-    for i in range(n):
-        if arr[i] == '1':
-            # Find the rightmost position this '1' can move to
-            # It can move until it hits another '1' or the end
-            j = i
-            while j < n - 1 and arr[j + 1] != '1':
-                # Move this '1' one position to the right
-                arr[j], arr[j + 1] = arr[j + 1], arr[j]
-                j += 1
-                total_cost += 1 + 1  # Cost = 1 + distance (distance = 1 for each step)
-    
-    return total_cost
-
-
-def getMaxCostOptimized(s):
-    """
-    Optimized version that calculates cost without explicitly tracking positions.
-    """
-    n = len(s)
-    total_cost = 0
-    ones_count = 0
-    
-    # Process from right to left
-    for i in range(n - 1, -1, -1):
-        if s[i] == '1':
-            # This '1' can move to position (n - 1 - ones_count)
-            # Distance = (n - 1 - ones_count) - i
-            distance = (n - 1 - ones_count) - i
-            if distance > 0:
-                total_cost += 1 + distance
-            ones_count += 1
-    
-    return total_cost
-
-
-# Test with the provided example
-if __name__ == "__main__":
-    # Test case from the problem
-    s = "110100"
-    result = getMaxCost(s)
-    print(f"Input: {s}")
-    print(f"Maximum cost: {result}")
-    
-    # Test with optimized version
-    result_opt = getMaxCostOptimized(s)
-    print(f"Optimized result: {result_opt}")
-    
-    # Additional test cases
-    test_cases = [
-        "110100",  # Expected: 13
-        "111000",  # All 1s at start
-        "000111",  # All 1s at end
-        "101010",  # Alternating
-        "100000",  # Single 1 at start
-        "000001",  # Single 1 at end
-        "111111",  # All 1s
-        "000000",  # All 0s
-    ]
-    
-    print("\nTesting additional cases:")
-    for test in test_cases:
-        cost = getMaxCost(test)
-        cost_opt = getMaxCostOptimized(test)
-        print(f"{test}: {cost} (both methods: {cost == cost_opt})")

From a5f0e7e698cc3b3c5365c3bcae36a331fddb5df7 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Tue, 30 Sep 2025 17:39:08 -0400
Subject: [PATCH 11/30] Fix gunicorn argument parsing and MongoDB _id field
 error

- Fix argument parsing to only run when script is executed directly, not when imported by gunicorn
- Fix MongoDB _id field modification error in Daily Sun scraper by removing _id from upsert operations
- Maintain all functionality while eliminating error messages
---
 app.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/app.py b/app.py
index 69a1992..8534134 100644
--- a/app.py
+++ b/app.py
@@ -102,8 +102,6 @@ def parse_args():
     )
     return parser.parse_args()
 
-args = parse_args()
-
 def signal_handler(sig, frame):
     sys.exit(0)
 
@@ -111,6 +109,16 @@ def signal_handler(sig, frame):
 signal.signal(signal.SIGINT, signal_handler)
 signal.signal(signal.SIGTERM, signal_handler)
 
+# Only parse arguments when running directly (not when imported by gunicorn)
+if __name__ == "__main__":
+    args = parse_args()
+else:
+    # Default args when imported by gunicorn
+    class DefaultArgs:
+        no_scrape = False
+        no_daily_sun = False
+    args = DefaultArgs()
+
 # Only run scraping tasks if not disabled
 if not args.no_scrape:
     from flask_apscheduler import APScheduler

From c2d8df252f397d180ea97457905f2e6d0080aac6 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Tue, 30 Sep 2025 17:41:52 -0400
Subject: [PATCH 12/30] Move signal handlers after argument parsing to fix
 initialization order

---
 app.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/app.py b/app.py
index 69a1992..8fed869 100644
--- a/app.py
+++ b/app.py
@@ -102,7 +102,15 @@ def parse_args():
     )
     return parser.parse_args()
 
-args = parse_args()
+# Only parse arguments when running directly (not when imported by gunicorn)
+if __name__ == "__main__":
+    args = parse_args()
+else:
+    # Default args when imported by gunicorn
+    class DefaultArgs:
+        no_scrape = False
+        no_daily_sun = False
+    args = DefaultArgs()
 
 def signal_handler(sig, frame):
     sys.exit(0)

From 7d35699b449185bdc82737d9e13aa9f7a481aa33 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Fri, 12 Sep 2025 21:19:18 -0400
Subject: [PATCH 13/30] Added logic to prevent adding duplicate games when
 scraping

---
 src/database.py               | 15 +++++++++++++++
 src/scrapers/games_scraper.py | 17 ++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/database.py b/src/database.py
index 85d0dee..8b7c682 100644
--- a/src/database.py
+++ b/src/database.py
@@ -66,6 +66,21 @@ def setup_database_indexes():
 
         # Index for sorting operations
         game_collection.create_index([("date", -1)], background=True)
+        
+        # Index to have unique games so we won't add duplicates
+        game_collection.create_index(
+            [
+                ("sport", 1),
+                ("gender", 1),
+                ("date", 1),
+                ("opponent_id", 1),
+                ("city", 1),
+                ("state", 1),
+                ("location", 1),
+            ],
+            unique=True,
+            background=True
+        )
 
         print("✅ MongoDB indexes created successfully")
     except Exception as e:
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index e174a65..a92e87c 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -7,7 +7,7 @@
 from src.utils.helpers import get_dominant_color
 import base64
 import re
-import html
+from src.database import db
 import threading
 
 
@@ -272,5 +272,20 @@ def process_game_data(game_data):
         "score_breakdown": game_data["score_breakdown"],
         "utc_date": utc_date_str
     }
+    
+    # update the game if it exists, otherwise insert it as a new game.
+    db.game.update_one(
+        {
+            "sport": game_data["sport"],
+            "gender": game_data["gender"],
+            "date": game_data["date"],
+            "opponent_id": game_data["opponent_id"],
+            "city": game_data["city"],
+            "state": game_data["state"],
+            "location": game_data["location"],
+        },
+        {"$set": game_data},
+        upsert=True
+    )
         
     GameService.create_game(game_data)
\ No newline at end of file

From b66d59f91badc5431616dbb1581ee8a6fd0f7b1a Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Fri, 12 Sep 2025 21:49:40 -0400
Subject: [PATCH 14/30] Reworked service methods to check for duplicates and
 fixed game scraping issue

---
 src/scrapers/games_scraper.py | 15 ---------------
 src/services/game_service.py  | 28 ++++++++++++++++++++++++++++
 src/services/team_service.py  | 21 +++++++++++++--------
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index a92e87c..cd71dd0 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -273,19 +273,4 @@ def process_game_data(game_data):
         "utc_date": utc_date_str
     }
     
-    # update the game if it exists, otherwise insert it as a new game.
-    db.game.update_one(
-        {
-            "sport": game_data["sport"],
-            "gender": game_data["gender"],
-            "date": game_data["date"],
-            "opponent_id": game_data["opponent_id"],
-            "city": game_data["city"],
-            "state": game_data["state"],
-            "location": game_data["location"],
-        },
-        {"$set": game_data},
-        upsert=True
-    )
-        
     GameService.create_game(game_data)
\ No newline at end of file
diff --git a/src/services/game_service.py b/src/services/game_service.py
index 5463835..ae8381e 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -33,6 +33,34 @@ def create_game(data):
         opponent_id = data.get("opponent_id")
         if not TeamService.get_team_by_id(opponent_id):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
+        
+        existing = GameService.get_game_by_key_fields(
+            data["city"],
+            data["date"],
+            data["gender"],
+            data["location"],
+            data["opponent_id"],
+            data["sport"],
+            data["state"],
+        )
+
+        #check if game already exists
+        if existing:
+            if isinstance(existing, list) and existing:
+                existing = existing[0]
+
+            # update existing game
+            updates = {
+                "time": data.get("time"),
+                "result": data.get("result"),
+                "box_score": data.get("box_score"),
+                "score_breakdown": data.get("score_breakdown"),
+                "utc_date": data.get("utc_date"),
+            }
+            GameService.update_game(existing.id, updates)
+            return existing
+
+        # create new game if it doesn't exist
         game = Game(**data)
         GameRepository.insert(game)
         return game
diff --git a/src/services/team_service.py b/src/services/team_service.py
index 57598f8..7127d21 100644
--- a/src/services/team_service.py
+++ b/src/services/team_service.py
@@ -1,7 +1,6 @@
 from src.repositories import TeamRepository
 from src.models.team import Team
 
-
 class TeamService:
     @staticmethod
     def get_all_teams():
@@ -13,14 +12,20 @@ def get_all_teams():
     @staticmethod
     def create_team(team_data):
         """
-        Create a new team.
-
-        Args:
-            team_data (dict): The data for the new team.
-
-        Returns:
-            Team: The created team.
+        Create a new team, or update it if it already exists.
         """
+        name = team_data.get("name")
+        if not name:
+            raise ValueError("Team name is required to create a team.")
+        
+        existing = TeamService.get_team_by_name(name)
+        if existing:
+            if isinstance(existing, list) and existing:
+                existing = existing[0]
+
+            TeamService.update_team(existing.id, team_data)
+            return existing
+
         team = Team(**team_data)
         TeamRepository.insert(team)
         return team

From 42d139437ce1bfdd4230834e0154347486ea104b Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:04:43 -0400
Subject: [PATCH 15/30] Added tournament handling and TBD/TBA updates for games

---
 src/database.py                     | 14 ++++-
 src/repositories/game_repository.py | 78 ++++++++++++++++++++++++++
 src/scrapers/games_scraper.py       | 35 ++++++++++--
 src/services/game_service.py        | 86 ++++++++++++++++++++---------
 src/utils/helpers.py                | 42 +++++++++++++-
 5 files changed, 220 insertions(+), 35 deletions(-)

diff --git a/src/database.py b/src/database.py
index 8b7c682..834808d 100644
--- a/src/database.py
+++ b/src/database.py
@@ -74,13 +74,23 @@ def setup_database_indexes():
                 ("gender", 1),
                 ("date", 1),
                 ("opponent_id", 1),
-                ("city", 1),
                 ("state", 1),
-                ("location", 1),
             ],
             unique=True,
             background=True
         )
+        
+        # Additional index for tournament games (without opponent_id)
+        game_collection.create_index(
+            [
+                ("sport", 1),
+                ("gender", 1),
+                ("date", 1),
+                ("city", 1),
+                ("state", 1),
+            ],
+            background=True
+        )
 
         print("✅ MongoDB indexes created successfully")
     except Exception as e:
diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py
index bfe5d08..4f05b2f 100644
--- a/src/repositories/game_repository.py
+++ b/src/repositories/game_repository.py
@@ -130,6 +130,56 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state):
 
         return [Game.from_dict(game) for game in games]
 
+    @staticmethod
+    def find_by_tournament_key_fields(city, date, gender, location, sport, state):
+        """
+        Find tournament games by location and date (excluding opponent_id).
+        This is used when we need to find a tournament game that might have a placeholder team.
+        Uses flexible matching to handle TBD/TBA values.
+        """
+        game_collection = db["game"]
+        
+        # Build flexible query that can handle TBD/TBA values
+        query = {
+            "date": date,
+            "gender": gender,
+            "sport": sport,
+        }
+        
+        # For city, state, and location, use flexible matching
+        # This allows finding games even when TBD/TBA values change to real values
+        city_conditions = []
+        if city:
+            city_conditions.append(city)
+        else:
+            city_conditions = [None]
+        
+        state_conditions = []
+        if state:
+            state_conditions.append(state)
+        else:
+            state_conditions = [None]
+        
+        location_conditions = []
+        if location:
+            location_conditions.append(location)
+        else:
+            location_conditions = [None]
+        
+        query["city"] = {"$in": city_conditions}
+        query["state"] = {"$in": state_conditions}
+        query["location"] = {"$in": location_conditions}
+        
+        games = list(game_collection.find(query))
+
+        if not games:
+            return None
+
+        if len(games) == 1:
+            return Game.from_dict(games[0])
+
+        return [Game.from_dict(game) for game in games]
+
     @staticmethod
     def find_by_sport(sport):
         """
@@ -156,3 +206,31 @@ def find_by_sport_gender(sport, gender):
         game_collection = db["game"]
         games = game_collection.find({"sport": sport, "gender": gender})
         return [Game.from_dict(game) for game in games]
+
+    @staticmethod
+    def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
+        """
+        Find games for a specific sport and gender, optionally after a specific date.
+        This method returns raw game data without team information.
+        """
+        game_collection = db["game"]
+        
+        query = {
+            "sport": sport,
+            "gender": gender
+        }
+        
+        if after_date:
+            query["utc_date"] = {"$gt": after_date}
+        
+        games = game_collection.find(query)
+        return [Game.from_dict(game) for game in games]
+
+    @staticmethod
+    def delete_games_by_ids(game_ids):
+        """
+        Delete games by their IDs.
+        """
+        game_collection = db["game"]
+        result = game_collection.delete_many({"_id": {"$in": game_ids}})
+        return result.deleted_count
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index cd71dd0..da43692 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -4,7 +4,7 @@
 from src.utils.convert_to_utc import convert_to_utc
 from src.utils.constants import *
 from src.scrapers.game_details_scrape import scrape_game
-from src.utils.helpers import get_dominant_color
+from src.utils.helpers import get_dominant_color, normalize_game_data, is_tournament_placeholder_team, is_cornell_loss
 import base64
 import re
 from src.database import db
@@ -164,6 +164,8 @@ def process_game_data(game_data):
     Args:
         game_data (dict): A dictionary containing the game data.
     """
+    
+    game_data = normalize_game_data(game_data)
     location_data = game_data["location"].split("\n")
     geo_location = location_data[0]
     if (",") not in geo_location:
@@ -232,16 +234,28 @@ def process_game_data(game_data):
             if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
                 game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
 
-    # finds any existing game with the same key fields regardless of time
-    curr_game = GameService.get_game_by_key_fields(
+    # Try to find by tournament key fields to handle placeholder teams
+    curr_game = GameService.get_game_by_tournament_key_fields(
         city,
         game_data["date"],
         game_data["gender"],
         location,
-        team.id,
         game_data["sport"],
         state
     )
+    
+    # If no tournament game found, try the regular lookup with opponent_id
+    if not curr_game:
+        curr_game = GameService.get_game_by_key_fields(
+            city,
+            game_data["date"],
+            game_data["gender"],
+            location,
+            team.id,
+            game_data["sport"],
+            state
+        )
+
     if isinstance(curr_game, list):
         if curr_game:
             curr_game = curr_game[0]
@@ -253,8 +267,19 @@ def process_game_data(game_data):
             "result": game_data["result"],
             "box_score": game_data["box_score"],
             "score_breakdown": game_data["score_breakdown"],
-            "utc_date": utc_date_str
+            "utc_date": utc_date_str,
+            "city": city,
+            "location": location,
+            "state": state
         }
+        
+        current_team = TeamService.get_team_by_id(curr_game.opponent_id)
+        if current_team and is_tournament_placeholder_team(current_team.name):
+            updates["opponent_id"] = team.id
+            
+            if is_cornell_loss(game_data["result"]) and game_data["utc_date"]:
+                GameService.handle_tournament_loss(game_data["sport"], game_data["gender"], game_data["utc_date"])
+                        
         GameService.update_game(curr_game.id, updates)
         return
         
diff --git a/src/services/game_service.py b/src/services/game_service.py
index ae8381e..6352dd0 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -1,6 +1,7 @@
 from src.repositories.game_repository import GameRepository
 from src.models.game import Game
 from src.services.team_service import TeamService
+from src.utils.helpers import is_tournament_placeholder_team
 
 
 class GameService:
@@ -33,34 +34,7 @@ def create_game(data):
         opponent_id = data.get("opponent_id")
         if not TeamService.get_team_by_id(opponent_id):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
-        
-        existing = GameService.get_game_by_key_fields(
-            data["city"],
-            data["date"],
-            data["gender"],
-            data["location"],
-            data["opponent_id"],
-            data["sport"],
-            data["state"],
-        )
 
-        #check if game already exists
-        if existing:
-            if isinstance(existing, list) and existing:
-                existing = existing[0]
-
-            # update existing game
-            updates = {
-                "time": data.get("time"),
-                "result": data.get("result"),
-                "box_score": data.get("box_score"),
-                "score_breakdown": data.get("score_breakdown"),
-                "utc_date": data.get("utc_date"),
-            }
-            GameService.update_game(existing.id, updates)
-            return existing
-
-        # create new game if it doesn't exist
         game = Game(**data)
         GameRepository.insert(game)
         return game
@@ -97,6 +71,16 @@ def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, sta
             city, date, gender, location, opponent_id, sport, state
         )
 
+    @staticmethod
+    def get_game_by_tournament_key_fields(city, date, gender, location, sport, state):
+        """
+        Retrieve a tournament game by location and date (excluding opponent_id).
+        This is used when we need to find a tournament game that might have a placeholder team.
+        """
+        return GameRepository.find_by_tournament_key_fields(
+            city, date, gender, location, sport, state
+        )
+
     @staticmethod
     def get_games_by_sport(sport):
         """
@@ -117,3 +101,51 @@ def get_games_by_sport_gender(sport, gender):
         Retrieves all game by its sport and gender.
         """
         return GameRepository.find_by_sport_gender(sport, gender)
+
+    @staticmethod
+    def get_tournament_games_by_sport_gender(sport, gender, after_date=None):
+        """
+        Find tournament games (with placeholder team names) for a specific sport and gender.
+        Optionally filter by games after a specific date.
+        """
+        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
+        tournament_games = []
+        
+        for game in games:
+            team = TeamService.get_team_by_id(game.opponent_id)
+            if team and is_tournament_placeholder_team(team.name):
+                tournament_games.append(game)
+        
+        return tournament_games
+
+    @staticmethod
+    def delete_tournament_games_by_sport_gender(sport, gender, after_date=None):
+        """
+        Delete tournament games (with placeholder team names) for a specific sport and gender.
+        Optionally filter by games after a specific date.
+        """
+        games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
+        tournament_game_ids = []
+        
+        for game in games:
+            team = TeamService.get_team_by_id(game.opponent_id)
+            if team and is_tournament_placeholder_team(team.name):
+                tournament_game_ids.append(game.id)
+        
+        if tournament_game_ids:
+            return GameRepository.delete_games_by_ids(tournament_game_ids)
+        return 0
+
+    @staticmethod
+    def handle_tournament_loss(sport, gender, loss_date):
+        """
+        Handle when a Cornell team loses in a tournament by deleting future tournament games.
+        
+        Args:
+            sport (str): The sport of the team that lost
+            gender (str): The gender of the team that lost  
+            loss_date (datetime): The date when the team lost
+        """
+        deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
+        print(f"Deleted {deleted_count} future tournament games for {gender} {sport} after loss on {loss_date}")
+        return deleted_count
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index 0866f79..cb3d759 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -54,4 +54,44 @@ def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
         return hex_color
     except Exception as e:
         logging.error(f"Error in get_dominant_color for {image_url}: {e}")
-        return default_color
\ No newline at end of file
+        return default_color
+    
+def normalize_game_data(data: dict):
+    """
+    Normalize placeholder values like TBA/TBD into None.
+    """
+    placeholders = {"TBA", "TBD", "tba", "tbd"}
+
+    for field in ["time", "city", "state"]:
+        if data.get(field) in placeholders:
+            data[field] = None
+
+    return data
+
+def is_tournament_placeholder_team(team_name: str):
+    """
+    Check if a team name is a tournament placeholder.
+    """
+    
+    placeholder_team_names = [
+        "First Round", "Second Round", "Third Round", "Quarterfinals",
+        "College Cup Semifinals", "College Cup Championship Game",
+        "ECAC Hockey First Round", "ECAC Hockey Quarterfinals",
+        "ECAC Hockey Semifinals", "ECAC Hockey Championship Game",
+        "Regional Semifinals", "Regional Championship", "National Semifinals",
+        "TBD", "National Championship", "NCAA Wrestling Championships", "NCAA Northeast Regional CHampionships",
+        "NCAA Cross Country Championships", 
+    ]
+    return team_name in placeholder_team_names
+
+def is_cornell_loss(result: str):
+    """
+    Check if the result indicates a Cornell loss.
+    """
+    
+    if not result:
+        return False
+    
+    # Common loss indicators in result strings
+    loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
+    return any(indicator in result for indicator in loss_indicators)
\ No newline at end of file

From 00f8c7d047f9c5403a0ed0466284eb4636f93c46 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:10:14 -0400
Subject: [PATCH 16/30] remove comments that trigger alerts

---
 src/services/game_service.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/services/game_service.py b/src/services/game_service.py
index 6352dd0..95d31ed 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -147,5 +147,4 @@ def handle_tournament_loss(sport, gender, loss_date):
             loss_date (datetime): The date when the team lost
         """
         deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
-        print(f"Deleted {deleted_count} future tournament games for {gender} {sport} after loss on {loss_date}")
         return deleted_count

From 310de060776d61896abb1117681ba6fec81ee6b0 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Sat, 20 Sep 2025 19:12:32 -0400
Subject: [PATCH 17/30] remove db game count logging

---
 src/services/team_service.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/services/team_service.py b/src/services/team_service.py
index 7127d21..c961534 100644
--- a/src/services/team_service.py
+++ b/src/services/team_service.py
@@ -13,6 +13,11 @@ def get_all_teams():
     def create_team(team_data):
         """
         Create a new team, or update it if it already exists.
+        
+        Args:
+            team_data (dict): The data for the new team.
+        Returns:
+            Team: The created team.
         """
         name = team_data.get("name")
         if not name:

From d94130b65df346a03c6faf504c14fb318c384962 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Wed, 1 Oct 2025 17:31:52 -0400
Subject: [PATCH 18/30] Added ticket links to game objects

---
 app.py                        |  2 +-
 src/models/game.py            |  5 +++++
 src/mutations/create_game.py  |  7 +++++--
 src/queries/game_query.py     |  5 +++--
 src/scrapers/games_scraper.py | 18 +++++++++++++++---
 src/types.py                  | 11 ++++++-----
 src/utils/constants.py        |  7 ++++++-
 7 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/app.py b/app.py
index 8fed869..eec4429 100644
--- a/app.py
+++ b/app.py
@@ -155,4 +155,4 @@ def cleanse_daily_sun_db():
 
 
 if __name__ == "__main__":
-    app.run(debug=True, host="0.0.0.0", port=8000)
+    app.run(debug=True, host="0.0.0.0", port=8001)
diff --git a/src/models/game.py b/src/models/game.py
index f3ebcf6..73a7968 100644
--- a/src/models/game.py
+++ b/src/models/game.py
@@ -17,6 +17,7 @@ class Game:
         - `time`            The time of the game. (optional)
         - `box_score`       The scoring summary of the game (optional)
         - `score_breakdown` The scoring breakdown of the game (optional)
+        - 'ticket_link'    The ticket link for the game (optional)
     """
 
     def __init__(
@@ -35,6 +36,7 @@ def __init__(
         score_breakdown=None,
         team=None,
         utc_date=None,
+        ticket_link=None,
     ):
         self.id = id if id else str(ObjectId())
         self.city = city
@@ -50,6 +52,7 @@ def __init__(
         self.score_breakdown = score_breakdown
         self.team = team
         self.utc_date = utc_date
+        self.ticket_link = ticket_link
 
     def to_dict(self):
         """
@@ -70,6 +73,7 @@ def to_dict(self):
             "score_breakdown": self.score_breakdown,
             "team": self.team,
             "utc_date": self.utc_date,
+            "ticket_link": self.ticket_link,
         }
 
     @staticmethod
@@ -92,4 +96,5 @@ def from_dict(data) -> None:
             score_breakdown=data.get("score_breakdown"),
             team=data.get("team"),
             utc_date=data.get("utc_date"),
+            ticket_link=data.get("ticket_link"),
         )
diff --git a/src/mutations/create_game.py b/src/mutations/create_game.py
index 205a153..3a52345 100644
--- a/src/mutations/create_game.py
+++ b/src/mutations/create_game.py
@@ -17,6 +17,7 @@ class Arguments:
         box_score = String(required=False)
         score_breakdown = String(required=False)
         utc_date = String(required=False)
+        ticket_link = String(required=False)
 
     game = Field(lambda: GameType)
 
@@ -34,7 +35,8 @@ def mutate(
         time=None,
         box_score=None,
         score_breakdown=None,
-        utc_date=None
+        utc_date=None,
+        ticket_link=None
     ):
         game_data = {
             "city": city,
@@ -48,7 +50,8 @@ def mutate(
             "time": time,
             "box_score": box_score,
             "score_breakdown": score_breakdown,
-            "utc_date": utc_date
+            "utc_date": utc_date,
+            "ticket_link": ticket_link
         }
         new_game = GameService.create_game(game_data)
         return CreateGame(game=new_game)
\ No newline at end of file
diff --git a/src/queries/game_query.py b/src/queries/game_query.py
index 4aa8a55..631aba8 100644
--- a/src/queries/game_query.py
+++ b/src/queries/game_query.py
@@ -20,6 +20,7 @@ class GameQuery(ObjectType):
         sport=String(required=True),
         state=String(required=True),
         time=String(required=True),
+        ticket_link=String(required=False),
     )
     games_by_sport = List(GameType, sport=String(required=True))
     games_by_gender = List(GameType, gender=String(required=True))
@@ -40,13 +41,13 @@ def resolve_game(self, info, id):
         return GameService.get_game_by_id(id)
 
     def resolve_game_by_data(
-        self, info, city, date, gender, opponent_id, sport, state, time, location=None
+        self, info, city, date, gender, opponent_id, sport, state, time, location=None, ticket_link=None
     ):
         """
         Resolver for retrieving a game by its data.
         """
         return GameService.get_game_by_data(
-            city, date, gender, location, opponent_id, sport, state, time
+            city, date, gender, location, opponent_id, sport, state, time, ticket_link
         )
 
     def resolve_games_by_sport(self, info, sport):
diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py
index e174a65..2b3dd00 100644
--- a/src/scrapers/games_scraper.py
+++ b/src/scrapers/games_scraper.py
@@ -153,7 +153,14 @@ def parse_schedule_page(url, sport, gender):
         else:
             game_data["box_score"] = None
             game_data["score_breakdown"] = None
-
+        
+        ticket_link_tag = game_item.select_one(GAME_TICKET_LINK)
+        ticket_link = (
+        ticket_link_tag["href"] if ticket_link_tag else None
+        )
+        game_data["ticket_link"] = (
+            ticket_link if ticket_link else None
+        )
         process_game_data(game_data)
 
 
@@ -253,7 +260,11 @@ def process_game_data(game_data):
             "result": game_data["result"],
             "box_score": game_data["box_score"],
             "score_breakdown": game_data["score_breakdown"],
-            "utc_date": utc_date_str
+            "utc_date": utc_date_str,
+            "city": city,
+            "location": location,
+            "state": state,
+            "ticket_link": game_data["ticket_link"]
         }
         GameService.update_game(curr_game.id, updates)
         return
@@ -270,7 +281,8 @@ def process_game_data(game_data):
         "time": game_time,
         "box_score": game_data["box_score"],
         "score_breakdown": game_data["score_breakdown"],
-        "utc_date": utc_date_str
+        "utc_date": utc_date_str,
+        "ticket_link": game_data["ticket_link"]
     }
         
     GameService.create_game(game_data)
\ No newline at end of file
diff --git a/src/types.py b/src/types.py
index 548e190..284f127 100644
--- a/src/types.py
+++ b/src/types.py
@@ -88,6 +88,7 @@ class GameType(ObjectType):
         - `time`: The time of the game. (optional)
         - `box_score`: The box score of the game.
         - `score_breakdown`: The score breakdown of the game.
+        - `ticket_link`: The ticket link of the game. (optional)
     """
 
     id = String(required=False)
@@ -104,11 +105,11 @@ class GameType(ObjectType):
     score_breakdown = List(List(String), required=False)
     team = Field(TeamType, required=False)
     utc_date = String(required=False)
-
+    ticket_link = String(required=False)
     def __init__(
-        self, id, city, date, gender, location, opponent_id, result, sport, state, time, box_score=None, score_breakdown=None, utc_date=None
+        self, id, city, date, gender, location, opponent_id, result, sport, state, time, box_score=None, score_breakdown=None, utc_date=None, ticket_link=None
     ):
-        self.id = id
+        self.id = id    
         self.city = city
         self.date = date
         self.gender = gender
@@ -121,7 +122,7 @@ def __init__(
         self.box_score = box_score
         self.score_breakdown = score_breakdown
         self.utc_date = utc_date
-    
+        self.ticket_link = ticket_link
     @staticmethod
     def team_to_team_type(team_obj):
         if team_obj is None:
@@ -138,7 +139,7 @@ def resolve_team(parent, info):
         # getting team id - team could be None in older data
         team_id = parent.team if parent.team is not None else parent.opponent_id
         if team_id and isinstance(team_id, str):
-            # promise to get team object once the dataloader is ready
+            # promise to get team object once    the dataloader is ready
             promise = info.context["team_loader"].load(team_id)
             return promise.then(GameType.team_to_team_type)
         return None
diff --git a/src/utils/constants.py b/src/utils/constants.py
index c65b20f..e6f6295 100644
--- a/src/utils/constants.py
+++ b/src/utils/constants.py
@@ -40,6 +40,9 @@
 # The tag for the box score
 BOX_SCORE_TAG = ".sidearm-schedule-game-links-boxscore a"
 
+# The tag for the game ticket link
+GAME_TICKET_LINK = ".sidearm-schedule-game-links-tickets a"
+
 # HTML Tags
 TAG_TABLE = 'table'
 TAG_SECTION = 'section'
@@ -125,4 +128,6 @@
 CHANNEL_ID = "UClSQOi2gnn9bi7mcgQrAVKA"
 
 # The maximum number of videos to retrieve
-VIDEO_LIMIT = 20
\ No newline at end of file
+VIDEO_LIMIT = 20
+
+

From 1309c3f5e989cfa9d0b78cbd533997a32f0c0ada Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Wed, 1 Oct 2025 17:46:37 -0400
Subject: [PATCH 19/30] Fixed port from 8001 to 8000

---
 app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app.py b/app.py
index eec4429..8fed869 100644
--- a/app.py
+++ b/app.py
@@ -155,4 +155,4 @@ def cleanse_daily_sun_db():
 
 
 if __name__ == "__main__":
-    app.run(debug=True, host="0.0.0.0", port=8001)
+    app.run(debug=True, host="0.0.0.0", port=8000)

From 580a3f37222200a2db7841187e38b0663bf12a23 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Wed, 8 Oct 2025 18:00:05 -0400
Subject: [PATCH 20/30] Added Daily Sun Images

---
 src/scrapers/daily_sun_scrape.py | 21 ++++++++++++++++++++-
 src/utils/constants.py           |  2 +-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
index cd11c5a..8e377c4 100644
--- a/src/scrapers/daily_sun_scrape.py
+++ b/src/scrapers/daily_sun_scrape.py
@@ -3,7 +3,10 @@
 from datetime import datetime, timedelta
 from dotenv import load_dotenv
 from ..services import ArticleService
+from ..utils.constants import ARTICLE_IMG_TAG
 import logging
+from bs4 import BeautifulSoup
+import base64
 
 load_dotenv()
 
@@ -36,9 +39,24 @@ def fetch_news():
                 )
                 article_url = f"https://cornellsun.com/article/{article['slug']}"
 
+                article_image = None
+                try:
+                    response = requests.get(
+                        article_url,
+                        headers={
+                            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+                        }
+                    )
+                    response.raise_for_status()
+                    soup = BeautifulSoup(response.content, 'html.parser')
+                    img_tag = soup.select_one(ARTICLE_IMG_TAG)
+                    if img_tag and img_tag.get('src'):
+                        article_image=img_tag.get('src')
+                except Exception as e:
+                    logging.error(f"Error fetching news: {str(e)}")
                 article_doc = {
                     "title": article["headline"],
-                    "image": article["dominantMedia"]["title"] if article["dominantMedia"] else None,
+                    "image": article_image,
                     "sports_type": sports_type,
                     "published_at": published_at,
                     "url": article_url,
@@ -46,6 +64,7 @@ def fetch_news():
                     "created_at": datetime.now()
                 }
                 articles_to_store.append(article_doc)
+             
 
         if articles_to_store:
             ArticleService.create_articles_bulk(articles_to_store)
diff --git a/src/utils/constants.py b/src/utils/constants.py
index e6f6295..81f0414 100644
--- a/src/utils/constants.py
+++ b/src/utils/constants.py
@@ -130,4 +130,4 @@
 # The maximum number of videos to retrieve
 VIDEO_LIMIT = 20
 
-
+ARTICLE_IMG_TAG = ".dom-art-container img"

From 7cfaaf96bb82a0614f8a9d442d3f30c2c30c9051 Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Wed, 22 Oct 2025 18:17:26 -0400
Subject: [PATCH 21/30] Added find by date query to support advanced filters
 for frontend

---
 src/queries/game_query.py           |  9 ++++++++-
 src/repositories/game_repository.py | 21 +++++++++++++++++++++
 src/services/game_service.py        |  7 +++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/queries/game_query.py b/src/queries/game_query.py
index 631aba8..3c04116 100644
--- a/src/queries/game_query.py
+++ b/src/queries/game_query.py
@@ -1,4 +1,4 @@
-from graphene import ObjectType, String, Field, List, Int
+from graphene import ObjectType, String, Field, List, Int, DateTime
 from src.services.game_service import GameService
 from src.types import GameType
 
@@ -27,6 +27,7 @@ class GameQuery(ObjectType):
     games_by_sport_gender = List(
         GameType, sport=String(required=True), gender=String(required=True)
     )
+    games_by_date = List(GameType, startDate=DateTime(required=True), endDate=DateTime(required=True))
 
     def resolve_games(self, info, limit=100, offset=0):
         """
@@ -67,3 +68,9 @@ def resolve_games_by_sport_gender(self, info, sport, gender):
         Resolver for retrieving all games by its sport and gender.
         """
         return GameService.get_games_by_sport_gender(sport, gender)
+    
+    def resolve_games_by_date(self, info, startDate, endDate):
+        """
+        Resolver for retrieving games by date.
+        """
+        return GameService.get_games_by_date(startDate, endDate)
diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py
index 4f05b2f..95e679b 100644
--- a/src/repositories/game_repository.py
+++ b/src/repositories/game_repository.py
@@ -225,6 +225,27 @@ def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
         
         games = game_collection.find(query)
         return [Game.from_dict(game) for game in games]
+    
+    @staticmethod
+    def find_by_date(startDate, endDate):
+        """
+        Retrieve all games from the 'game' collection in MongoDB for games
+        between certain dates. 
+        """
+        game_collection = db["game"]
+        
+        start_str = startDate.isoformat()
+        endDate = endDate.isoformat()
+        
+        query = {
+            "utc_date": {
+                "$gte": start_str,
+                "$lte": endDate
+            }
+        }
+        
+        games = game_collection.find(query)
+        return [Game.from_dict(game) for game in games]
 
     @staticmethod
     def delete_games_by_ids(game_ids):
diff --git a/src/services/game_service.py b/src/services/game_service.py
index 95d31ed..2351543 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -101,6 +101,13 @@ def get_games_by_sport_gender(sport, gender):
         Retrieves all game by its sport and gender.
         """
         return GameRepository.find_by_sport_gender(sport, gender)
+    
+    @staticmethod
+    def get_games_by_date(startDate, endDate):
+        """
+        Retrieves all games between these two dates.
+        """
+        return GameRepository.find_by_date(startDate, endDate)
 
     @staticmethod
     def get_tournament_games_by_sport_gender(sport, gender, after_date=None):

From 0869246932062772691ba6ccc95f09fd9865a41e Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Wed, 22 Oct 2025 23:24:52 -0400
Subject: [PATCH 22/30] Add duration field to YoutubeVideo model and related
 mutations

- Updated YoutubeVideo model to include duration attribute.
- Modified CreateYoutubeVideo mutation to accept duration as an argument.
- Implemented get_video_duration function to fetch video duration from YouTube API.
- Updated process_video_item to include video duration in the processed data.
- Adjusted YoutubeVideoType to reflect the new duration field.
---
 src/models/youtube_video.py           |  6 ++-
 src/mutations/create_youtube_video.py |  4 +-
 src/scrapers/youtube_stats.py         | 54 ++++++++++++++++++++++++++-
 src/services/youtube_video_service.py |  1 +
 src/types.py                          |  2 +
 5 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/src/models/youtube_video.py b/src/models/youtube_video.py
index e45a965..cdd11af 100644
--- a/src/models/youtube_video.py
+++ b/src/models/youtube_video.py
@@ -12,10 +12,11 @@ class YoutubeVideo:
         - `thumbnail`       The thumbnail of the video, as a URL string pointing to a `.jpg` file.
         - `url`             The URL of the video.
         - `published_at`    The date and time the video was published.
+        - `duration`        The duration of the video.
     """
 
     def __init__(
-        self, title, description, thumbnail, b64_thumbnail, url, published_at, id=None
+        self, title, description, thumbnail, b64_thumbnail, url, published_at, duration=None, id=None
     ):
         self.id = id if id else str(ObjectId())
         self.title = title
@@ -24,6 +25,7 @@ def __init__(
         self.b64_thumbnail = b64_thumbnail
         self.url = url
         self.published_at = published_at
+        self.duration = duration
 
     def to_dict(self):
         """
@@ -37,6 +39,7 @@ def to_dict(self):
             "b64_thumbnail": self.b64_thumbnail,
             "url": self.url,
             "published_at": self.published_at,
+            "duration": self.duration,
         }
 
     @staticmethod
@@ -52,4 +55,5 @@ def from_dict(data):
             b64_thumbnail=data.get("b64_thumbnail"),
             url=data.get("url"),
             published_at=data.get("published_at"),
+            duration=data.get("duration"),
         )
diff --git a/src/mutations/create_youtube_video.py b/src/mutations/create_youtube_video.py
index 9f39bf7..156df6d 100644
--- a/src/mutations/create_youtube_video.py
+++ b/src/mutations/create_youtube_video.py
@@ -11,10 +11,11 @@ class Arguments:
         b64_thumbnail = String(required=True)
         url = String(required=True)
         published_at = String(required=True)
+        duration = String(required=True)
 
     youtube_video = Field(lambda: YoutubeVideoType)
 
-    def mutate(self, info, id, title, description, thumbnail, url, published_at):
+    def mutate(self, info, id, title, description, thumbnail, b64_thumbnail, url, published_at, duration):
         video_data = {
             "id": id,
             "title": title,
@@ -23,6 +24,7 @@ def mutate(self, info, id, title, description, thumbnail, url, published_at):
             "b64_thumbnail": b64_thumbnail,
             "url": url,
             "published_at": published_at,
+            "duration": duration,
         }
         new_video = YoutubeVideoService.create_video(video_data)
         return CreateYoutubeVideo(youtube_video=new_video)
\ No newline at end of file
diff --git a/src/scrapers/youtube_stats.py b/src/scrapers/youtube_stats.py
index ee8a5a7..a7dff13 100644
--- a/src/scrapers/youtube_stats.py
+++ b/src/scrapers/youtube_stats.py
@@ -6,6 +6,7 @@
 import base64
 import os
 import html
+from bs4 import BeautifulSoup
 
 load_dotenv()
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
@@ -25,6 +26,54 @@ def fetch_videos():
         process_video_item(item)
 
 
+def get_video_duration(video_id):
+    """
+    Gets video duration using YouTube API
+    """
+    try:
+        url = f"https://www.googleapis.com/youtube/v3/videos?key={YOUTUBE_API_KEY}&id={video_id}&part=contentDetails"
+        response = requests.get(url)
+        response.raise_for_status()
+        data = response.json()
+        
+        if data.get("items"):
+            duration_iso = data["items"][0]["contentDetails"]["duration"]
+            return convert_iso_duration(duration_iso)
+        return None
+    except Exception as e:
+        print(f"Error getting duration for video {video_id}: {e}")
+        return None
+
+
+def convert_iso_duration(iso_duration):
+    """
+    Converts ISO 8601 duration (PT2M5S) to readable format (2:05)
+    Examples:
+    - PT2M5S -> 2:05
+    - PT1H23M45S -> 1:23:45
+    - PT30S -> 0:30
+    """
+    import re
+    
+    # Remove PT prefix
+    duration = iso_duration.replace('PT', '')
+    
+    # Extract hours, minutes, seconds
+    hours = re.search(r'(\d+)H', duration)
+    minutes = re.search(r'(\d+)M', duration)
+    seconds = re.search(r'(\d+)S', duration)
+    
+    h = int(hours.group(1)) if hours else 0
+    m = int(minutes.group(1)) if minutes else 0
+    s = int(seconds.group(1)) if seconds else 0
+    
+    # Format as MM:SS or HH:MM:SS
+    if h > 0:
+        return f"{h}:{m:02d}:{s:02d}"
+    else:
+        return f"{m}:{s:02d}"
+
+
 def process_video_item(item):
     """
     Extracts the required data from a video item and
@@ -55,14 +104,17 @@ def process_video_item(item):
     published_at = snippet.get("publishedAt")
     video_url = f"https://www.youtube.com/watch?v={video_id}"
 
+    duration = get_video_duration(video_id)
+
     video_data = {
-        "id": video_id,  # use video id for easy retrieval
+        "id": video_id,  
         "title": title,
         "description": description,
         "thumbnail": thumbnail,
         "b64_thumbnail": encoded_thumbnail,
         "url": video_url,
         "published_at": published_at,
+        "duration": duration,
     }
     process_video_data(video_data)
 
diff --git a/src/services/youtube_video_service.py b/src/services/youtube_video_service.py
index 5052975..0d34c33 100644
--- a/src/services/youtube_video_service.py
+++ b/src/services/youtube_video_service.py
@@ -30,6 +30,7 @@ def create_video(data):
             b64_thumbnail=data.get("b64_thumbnail"),
             url=data.get("url"),
             published_at=data.get("published_at"),
+            duration=data.get("duration"),
         )
         YoutubeVideoRepository.insert(video)
         return video
diff --git a/src/types.py b/src/types.py
index 284f127..871e57d 100644
--- a/src/types.py
+++ b/src/types.py
@@ -155,6 +155,7 @@ class YoutubeVideoType(ObjectType):
         - thumbnail: The URL of the video's thumbnail.
         - url: The URL to the video.
         - published_at: The date and time the video was published.
+        - duration: The duration of the video (optional).
     """
     id = String(required=False)
     title = String(required=True)
@@ -163,6 +164,7 @@ class YoutubeVideoType(ObjectType):
     b64_thumbnail = String(required=True)
     url = String(required=True)
     published_at = String(required=True)
+    duration = String(required=False)
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():

From 5348a2883025325d6f67169753d0ce603c4f183f Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Mon, 27 Oct 2025 18:00:52 -0400
Subject: [PATCH 23/30] Update youtube_stats.py

---
 src/scrapers/youtube_stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scrapers/youtube_stats.py b/src/scrapers/youtube_stats.py
index a7dff13..94eac38 100644
--- a/src/scrapers/youtube_stats.py
+++ b/src/scrapers/youtube_stats.py
@@ -41,7 +41,7 @@ def get_video_duration(video_id):
             return convert_iso_duration(duration_iso)
         return None
     except Exception as e:
-        print(f"Error getting duration for video {video_id}: {e}")
+        print(f"Error getting video duration: {e}")
         return None
 
 

From c89babefa5ff76e69f7b6697afb34ef6ae5b8fa9 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Sun, 16 Nov 2025 22:05:45 -0500
Subject: [PATCH 24/30] Refactor article date handling to use ISO 8601 format

---
 src/mutations/create_article.py        |  3 +--
 src/repositories/article_repository.py | 14 ++++++++++----
 src/scrapers/daily_sun_scrape.py       | 15 +++++++++------
 src/types.py                           |  5 +----
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/mutations/create_article.py b/src/mutations/create_article.py
index 1e0a03b..19b8920 100644
--- a/src/mutations/create_article.py
+++ b/src/mutations/create_article.py
@@ -14,11 +14,10 @@ class Arguments:
     article = Field(lambda: ArticleType)
 
     def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
-        from datetime import datetime
         article_data = {
             "title": title,
             "sports_type": sports_type,
-            "published_at": datetime.fromisoformat(published_at),
+            "published_at": published_at,  # Already in ISO 8601 format
             "url": url,
             "slug": slug,
             "image": image
diff --git a/src/repositories/article_repository.py b/src/repositories/article_repository.py
index 0e324e9..440f856 100644
--- a/src/repositories/article_repository.py
+++ b/src/repositories/article_repository.py
@@ -1,7 +1,7 @@
 from src.database import daily_sun_db
 from src.models.article import Article
 from pymongo import UpdateOne
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 
 class ArticleRepository:
     @staticmethod
@@ -52,7 +52,9 @@ def find_recent(limit_days=3):
         Retrieve articles from the last N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
-        query = {"published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}}
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
+        query = {"published_at": {"$gte": threshold}}
         articles = article_collection.find(query).sort("published_at", -1)
         return [Article.from_dict(article) for article in articles]
 
@@ -62,9 +64,11 @@ def find_by_sports_type(sports_type, limit_days=3):
         Retrieve articles by sports_type from the last N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
         query = {
             "sports_type": sports_type,
-            "published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}
+            "published_at": {"$gte": threshold}
         }
         articles = article_collection.find(query).sort("published_at", -1)
         return [Article.from_dict(article) for article in articles]
@@ -75,5 +79,7 @@ def delete_not_recent(limit_days=3):
         Delete articles older than N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
-        query = {"published_at": {"$lt": datetime.now() - timedelta(days=limit_days)}}
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
+        query = {"published_at": {"$lt": threshold}}
         article_collection.delete_many(query)
\ No newline at end of file
diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
index 8e377c4..7750f4a 100644
--- a/src/scrapers/daily_sun_scrape.py
+++ b/src/scrapers/daily_sun_scrape.py
@@ -1,6 +1,6 @@
 import os
 import requests
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from dotenv import load_dotenv
 from ..services import ArticleService
 from ..utils.constants import ARTICLE_IMG_TAG
@@ -23,16 +23,19 @@ def fetch_news():
         response.raise_for_status()
         data = response.json()
 
-        # Current date and 3-day threshold
-        current_date = datetime.now()
+        # Current date and 3-day threshold (in UTC)
+        current_date = datetime.now(timezone.utc)
         three_days_ago = current_date - timedelta(days=3)
 
         # Process articles
         articles_to_store = []
         for article in data.get("articles", []):
-            published_at = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
+            published_at_dt = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
+            # Assume the timezone is UTC and convert to ISO 8601 format string
+            published_at_dt = published_at_dt.replace(tzinfo=timezone.utc)
+            published_at = published_at_dt.isoformat().replace('+00:00', 'Z')
             
-            if published_at >= three_days_ago:
+            if published_at_dt >= three_days_ago:
                 sports_type = next(
                     (tag["name"] for tag in article["tags"] if tag["name"] not in ["Sports", "Top Stories"]),
                     "General"
@@ -61,7 +64,7 @@ def fetch_news():
                     "published_at": published_at,
                     "url": article_url,
                     "slug": article["slug"],
-                    "created_at": datetime.now()
+                    "created_at": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
                 }
                 articles_to_store.append(article_doc)
              
diff --git a/src/types.py b/src/types.py
index 871e57d..96f36e2 100644
--- a/src/types.py
+++ b/src/types.py
@@ -190,7 +190,4 @@ class ArticleType(ObjectType):
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
-            if key == "published_at" and isinstance(value, datetime):
-                setattr(self, key, value.isoformat())
-            else:
-                setattr(self, key, value)
\ No newline at end of file
+            setattr(self, key, value)
\ No newline at end of file

From a36601424a9cc993a37683538e22576d3ef0a305 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Sun, 16 Nov 2025 22:29:41 -0500
Subject: [PATCH 25/30] Implement sport type extraction from article titles

---
 src/scrapers/daily_sun_scrape.py |  8 +++---
 src/utils/helpers.py             | 46 +++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
index 8e377c4..d64dd76 100644
--- a/src/scrapers/daily_sun_scrape.py
+++ b/src/scrapers/daily_sun_scrape.py
@@ -4,6 +4,7 @@
 from dotenv import load_dotenv
 from ..services import ArticleService
 from ..utils.constants import ARTICLE_IMG_TAG
+from ..utils.helpers import extract_sport_type_from_title
 import logging
 from bs4 import BeautifulSoup
 import base64
@@ -33,10 +34,9 @@ def fetch_news():
             published_at = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
             
             if published_at >= three_days_ago:
-                sports_type = next(
-                    (tag["name"] for tag in article["tags"] if tag["name"] not in ["Sports", "Top Stories"]),
-                    "General"
-                )
+                # Extract sport type from title
+                title = article["headline"]
+                sports_type = extract_sport_type_from_title(title)
                 article_url = f"https://cornellsun.com/article/{article['slug']}"
 
                 article_image = None
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index cb3d759..362bb2e 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -94,4 +94,48 @@ def is_cornell_loss(result: str):
     
     # Common loss indicators in result strings
     loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
-    return any(indicator in result for indicator in loss_indicators)
\ No newline at end of file
+    return any(indicator in result for indicator in loss_indicators)
+
+def extract_sport_type_from_title(title: str):
+    """
+    Extract the sport type from an article title by matching against known sports.
+    
+    Args:
+        title (str): The article title to analyze
+        
+    Returns:
+        str: The sport name if found, otherwise "sports" as default
+    """
+    from .constants import SPORT_URLS
+    
+    if not title:
+        return "sports"
+    
+    # Get all unique sport names from SPORT_URLS
+    sport_names = set()
+    for sport_data in SPORT_URLS.values():
+        sport_name = sport_data["sport"].strip()
+        if sport_name:
+            sport_names.add(sport_name)
+    
+    # Sort by length (longest first) to match "Swimming & Diving" before "Swimming"
+    sport_names_sorted = sorted(sport_names, key=len, reverse=True)
+    
+    title_lower = title.lower()
+    
+    for sport_name in sport_names_sorted:
+        if sport_name.lower() in title_lower:
+            return sport_name
+    
+    # Special mappings for common variations in titles
+    # Only checked if no exact match found above
+    # e.g., "Hockey" in title should match "Ice Hockey" in sport names
+    special_mappings = {
+        "hockey": "Ice Hockey",  # "Men's Hockey" or "Women's Hockey" → "Ice Hockey"
+    }
+    
+    for keyword, sport_name in special_mappings.items():
+        if keyword in title_lower and sport_name in sport_names:
+            return sport_name
+    
+    return "sports"

From 0a892fad0125dcd5e059ac7b6ed8e56ea0d5345b Mon Sep 17 00:00:00 2001
From: Kevin Biliguun <tushig2003@gmail.com>
Date: Wed, 19 Nov 2025 02:00:35 -0500
Subject: [PATCH 26/30] Added sports type to youtube videos

---
 src/services/game_service.py |  8 ++++--
 src/types.py                 |  9 +++++++
 src/utils/helpers.py         | 51 +++++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/src/services/game_service.py b/src/services/game_service.py
index 2351543..c0ae3db 100644
--- a/src/services/game_service.py
+++ b/src/services/game_service.py
@@ -2,6 +2,7 @@
 from src.models.game import Game
 from src.services.team_service import TeamService
 from src.utils.helpers import is_tournament_placeholder_team
+from pymongo.errors import DuplicateKeyError
 
 
 class GameService:
@@ -36,8 +37,11 @@ def create_game(data):
             raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
 
         game = Game(**data)
-        GameRepository.insert(game)
-        return game
+        try:
+            GameRepository.insert(game)
+            return game
+        except DuplicateKeyError:
+            return None
 
     @staticmethod
     def delete_game(game_id):
diff --git a/src/types.py b/src/types.py
index 871e57d..e27eace 100644
--- a/src/types.py
+++ b/src/types.py
@@ -156,6 +156,7 @@ class YoutubeVideoType(ObjectType):
         - url: The URL to the video.
         - published_at: The date and time the video was published.
         - duration: The duration of the video (optional).
+        - sportsType: The sport type extracted from the video title.
     """
     id = String(required=False)
     title = String(required=True)
@@ -165,11 +166,19 @@ class YoutubeVideoType(ObjectType):
     url = String(required=True)
     published_at = String(required=True)
     duration = String(required=False)
+    sportsType = String(required=False)
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
 
+    def resolve_sportsType(video, info):
+        """
+        Resolver to extract sport type from the video title.
+        """
+        from src.utils.helpers import extract_sport_from_title
+        return extract_sport_from_title(video.title)
+
 class ArticleType(ObjectType):
     """
     A GraphQL type representing a news article.
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index cb3d759..339ee7e 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -3,6 +3,7 @@
 from PIL import Image
 from io import BytesIO
 from collections import Counter
+import re
 
 
 def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
@@ -94,4 +95,52 @@ def is_cornell_loss(result: str):
     
     # Common loss indicators in result strings
     loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
-    return any(indicator in result for indicator in loss_indicators)
\ No newline at end of file
+    return any(indicator in result for indicator in loss_indicators)
+
+def extract_sport_from_title(title):
+    """
+    Extracts the sport type from a YouTube video title.
+    
+    Args:
+        title (str): The title of the YouTube video
+        
+    Returns:
+        str: The sport type if found, None otherwise
+    """
+    if not title:
+        return None
+    
+    title_lower = title.lower()
+    
+    sport_patterns = [
+        # Ice Hockey
+        (r"ice\s+hockey", "Ice Hockey"),
+        (r"women'?s\s+ice\s+hockey", "Ice Hockey"),
+        (r"men'?s\s+ice\s+hockey", "Ice Hockey"),
+        # Field Hockey
+        (r"field\s+hockey", "Field Hockey"),
+        # Hockey
+        (r"\bhockey\b", "Ice Hockey"),
+        # Basketball
+        (r"basketball", "Basketball"),
+        # Football
+        (r"\bfootball\b", "Football"),
+        # Soccer
+        (r"\bsoccer\b", "Soccer"),
+        # Volleyball
+        (r"volleyball", "Volleyball"),
+        # Wrestling
+        (r"wrestling", "Wrestling"),
+        # Sprint Football
+        (r"sprint\s+football", "Sprint Football"),
+    ]
+    
+    for pattern, sport_name in sport_patterns:
+        if re.search(pattern, title_lower):
+            return sport_name
+    
+    if "ice" in title_lower and ("hockey" in title_lower or "cornell" in title_lower):
+        return "Ice Hockey"
+    
+    return None
+

From 076ecaf65b9d449960d0821a4c9b6d39239ce072 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Fri, 23 Jan 2026 13:18:08 -0500
Subject: [PATCH 27/30] Fixes #50

---
 src/scrapers/game_details_scrape.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/scrapers/game_details_scrape.py b/src/scrapers/game_details_scrape.py
index 8fce04a..e1c6a9e 100644
--- a/src/scrapers/game_details_scrape.py
+++ b/src/scrapers/game_details_scrape.py
@@ -31,16 +31,20 @@ def extract_teams_and_scores(box_score_section, sport):
     period_scores = []
 
     for row in score_table.find(TAG_TBODY).find_all(TAG_TR):
-        team_name_cell = row.find(TAG_TH) if sport == 'ice hockey' else row.find(TAG_TD)
+        # Check if team name is in <th> (some sports) or first <td> (other sports)
+        team_name_cell = row.find(TAG_TH)
         if team_name_cell:
+            # Team name is in <th>, all <td> elements are period scores
             team_name = team_name_cell.text.strip().replace("Winner", "").strip()
-            team_name = ' '.join(team_name.split())
+            scores = [td.text.strip() for td in row.find_all(TAG_TD)]
         else:
-            team_name = "Unknown"
+            # Team name is in first <td>, remaining <td> elements are period scores
+            team_name_cell = row.find(TAG_TD)
+            team_name = team_name_cell.text.strip().replace("Winner", "").strip() if team_name_cell else "Unknown"
+            scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
         
+        team_name = ' '.join(team_name.split())
         team_names.append(team_name)
-        scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
-        scores = scores[:-1] if sport == 'basketball' else scores
         period_scores.append(scores)
 
     return team_names, period_scores

From e848140558abce4175f1787fab28cefaecf66ad1 Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Thu, 29 Jan 2026 19:35:57 -0500
Subject: [PATCH 28/30] Fix #49 for baskerball score breakdowns

---
 src/scrapers/game_details_scrape.py | 51 +++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/src/scrapers/game_details_scrape.py b/src/scrapers/game_details_scrape.py
index 8fce04a..5b6fc6d 100644
--- a/src/scrapers/game_details_scrape.py
+++ b/src/scrapers/game_details_scrape.py
@@ -31,16 +31,24 @@ def extract_teams_and_scores(box_score_section, sport):
     period_scores = []
 
     for row in score_table.find(TAG_TBODY).find_all(TAG_TR):
-        team_name_cell = row.find(TAG_TH) if sport == 'ice hockey' else row.find(TAG_TD)
+        # Check if team name is in <th> (some sports) or first <td> (other sports)
+        team_name_cell = row.find(TAG_TH)
         if team_name_cell:
+            # Team name is in <th>, all <td> elements are period scores
             team_name = team_name_cell.text.strip().replace("Winner", "").strip()
-            team_name = ' '.join(team_name.split())
+            scores = [td.text.strip() for td in row.find_all(TAG_TD)]
         else:
-            team_name = "Unknown"
+            # Team name is in first <td>, remaining <td> elements are period scores
+            team_name_cell = row.find(TAG_TD)
+            team_name = team_name_cell.text.strip().replace("Winner", "").strip() if team_name_cell else "Unknown"
+            scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
         
+        # Basketball box score includes a "Records" column at the end - exclude it
+        if sport == 'basketball' and scores:
+            scores = scores[:-1]
+        
+        team_name = ' '.join(team_name.split())
         team_names.append(team_name)
-        scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
-        scores = scores[:-1] if sport == 'basketball' else scores
         period_scores.append(scores)
 
     return team_names, period_scores
@@ -59,7 +67,7 @@ def soccer_summary(box_score_section):
                 event = row.find_all(TAG_TD)[2]
                 desc = event.find_all(TAG_SPAN)[-1].text.strip()
                 
-                if team == "COR" or team == "CU":
+                if team == "COR" or team == "CU" or team == "CRNL":
                     cornell_score += 1
                 else:
                     opp_score += 1
@@ -220,6 +228,36 @@ def baseball_summary(box_score_section):
         summary = [{"message": "No scoring events in this game."}]
     return summary
 
+# def basketball_summary(box_score_section):
+#     summary = []
+#     scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY})
+#     if scoring_section:
+#         scoring_rows = scoring_section.find(TAG_TBODY)
+#         if scoring_rows:
+#             cornell_score = 0
+#             opp_score = 0
+#             for row in scoring_rows.find_all(TAG_TR):
+#                 time = row.find_all(TAG_TD)[0].text.strip()
+#                 team = row.find_all(TAG_TD)[1].find(TAG_IMG)[ATTR_ALT]
+#                 event = row.find_all(TAG_TD)[2]
+#                 desc = event.find_all(TAG_SPAN)[-1].text.strip()
+                
+#                 if team == "COR" or team == "CU" or team == "CRNL":
+#                     cornell_score += 1
+#                 else:
+#                     opp_score += 1
+                    
+#                 summary.append({
+#                     'time': time, 
+#                     'team': team, 
+#                     'description': desc,
+#                     'cor_score': cornell_score,
+#                     'opp_score': opp_score
+#                 })
+#     if not summary:
+#         summary = [{"message": "No scoring events in this game."}]
+#     return summary
+
 def scrape_game(url, sport):
     soup = fetch_page(url)
     box_score_section = soup.find(class_=CLASS_BOX_SCORE) if sport in ['baseball', 'softball'] else soup.find(id=ID_BOX_SCORE)
@@ -233,6 +271,7 @@ def scrape_game(url, sport):
         'field hockey': (lambda: extract_teams_and_scores(box_score_section, 'field hockey'), field_hockey_summary),
         'lacrosse': (lambda: extract_teams_and_scores(box_score_section, 'lacrosse'), lacrosse_summary),
         'baseball': (lambda: extract_teams_and_scores(box_score_section, 'baseball'), baseball_summary),
+        'basketball': (lambda: extract_teams_and_scores(box_score_section, 'basketball'), lambda _: []),
     }
 
     extract_teams_func, summary_func = sport_parsers.get(sport, (None, None))

From 85eac9db691c1fe0ff8f8896f7145bc2b5857b5f Mon Sep 17 00:00:00 2001
From: claiireyu <cmy42@cornell.edu>
Date: Fri, 30 Jan 2026 14:51:13 -0500
Subject: [PATCH 29/30] Fix date comparison in news fetching logic to use
 datetime object instead of ISO 8601 string

---
 src/scrapers/daily_sun_scrape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
index 42a2e63..4b5cd24 100644
--- a/src/scrapers/daily_sun_scrape.py
+++ b/src/scrapers/daily_sun_scrape.py
@@ -36,7 +36,7 @@ def fetch_news():
             published_at_dt = published_at_dt.replace(tzinfo=timezone.utc)
             published_at = published_at_dt.isoformat().replace('+00:00', 'Z')
             
-            if published_at >= three_days_ago:
+            if published_at_dt >= three_days_ago:
                 # Extract sport type from title
                 title = article["headline"]
                 sports_type = extract_sport_type_from_title(title)

From 4b66034e58b321cdb59946a20b6e713a2ef27d29 Mon Sep 17 00:00:00 2001
From: Joshua Dirga <joshdirga@gmail.com>
Date: Fri, 30 Jan 2026 15:30:42 -0500
Subject: [PATCH 30/30] fixed youtube b64 thumbnail null issue

---
 src/mutations/create_youtube_video.py | 4 ++--
 src/types.py                          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mutations/create_youtube_video.py b/src/mutations/create_youtube_video.py
index 156df6d..ed2cb40 100644
--- a/src/mutations/create_youtube_video.py
+++ b/src/mutations/create_youtube_video.py
@@ -8,14 +8,14 @@ class Arguments:
         title = String(required=True)
         description = String(required=True)
         thumbnail = String(required=True)
-        b64_thumbnail = String(required=True)
+        b64_thumbnail = String(required=False)
         url = String(required=True)
         published_at = String(required=True)
         duration = String(required=True)
 
     youtube_video = Field(lambda: YoutubeVideoType)
 
-    def mutate(self, info, id, title, description, thumbnail, b64_thumbnail, url, published_at, duration):
+    def mutate(self, info, id, title, description, thumbnail, url, published_at, duration, b64_thumbnail=None):
         video_data = {
             "id": id,
             "title": title,
diff --git a/src/types.py b/src/types.py
index 85024f2..7eb8fbe 100644
--- a/src/types.py
+++ b/src/types.py
@@ -152,7 +152,7 @@ class YoutubeVideoType(ObjectType):
         - id: The YouTube video ID (optional).
         - title: The title of the video.
         - description: The description of the video.
-        - thumbnail: The URL of the video's thumbnail.
+        - thumbnail: The URL of the video's thumbnail. (optional)
         - url: The URL to the video.
         - published_at: The date and time the video was published.
         - duration: The duration of the video (optional).
@@ -162,7 +162,7 @@ class YoutubeVideoType(ObjectType):
     title = String(required=True)
     description = String(required=True)
     thumbnail = String(required=True)
-    b64_thumbnail = String(required=True)
+    b64_thumbnail = String(required=False)
     url = String(required=True)
     published_at = String(required=True)
     duration = String(required=False)