Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
3d88699
implement articles from daily sun
mateow99 Mar 25, 2025
cc9ebd1
update env template
mateow99 Mar 27, 2025
9d1792c
Added logic to prevent adding duplicate games when scraping
TushigBili Sep 13, 2025
d205803
Reworked service methods to check for duplicates and fixed game scrap…
TushigBili Sep 13, 2025
e1f080c
Added tournament handling and TBD/TBA updates for games
TushigBili Sep 20, 2025
e2ac05f
remove comments that trigger alerts
TushigBili Sep 20, 2025
756ec32
remove db game count logging
TushigBili Sep 20, 2025
721f0f8
Merge pull request #36 from cuappdev/fix-duplicate-games
TushigBili Sep 24, 2025
907fe84
Revert "Merge pull request #36 from cuappdev/fix-duplicate-games"
TushigBili Sep 24, 2025
1de4e00
revert pr
TushigBili Sep 24, 2025
bbe2408
revert pr
TushigBili Sep 24, 2025
a64620b
Merge pull request #37 from cuappdev/revert-duplicate-games-fix
angelinaa-chen Sep 29, 2025
1bcf70a
resolve conflicts
TushigBili Sep 30, 2025
74e0070
Merge pull request #19 from cuappdev/maw346/add-daily-sun
TushigBili Sep 30, 2025
a5f0e7e
Fix gunicorn argument parsing and MongoDB _id field error
TushigBili Sep 30, 2025
c2d8df2
Move signal handlers after argument parsing to fix initialization order
TushigBili Sep 30, 2025
8b373d9
Merge pull request #38 from cuappdev/maw346/add-daily-sun
TushigBili Sep 30, 2025
71e718a
resolve conflicts
TushigBili Sep 30, 2025
7d35699
Added logic to prevent adding duplicate games when scraping
TushigBili Sep 13, 2025
b66d59f
Reworked service methods to check for duplicates and fixed game scrap…
TushigBili Sep 13, 2025
42d1394
Added tournament handling and TBD/TBA updates for games
TushigBili Sep 20, 2025
00f8c7d
remove comments that trigger alerts
TushigBili Sep 20, 2025
310de06
remove db game count logging
TushigBili Sep 20, 2025
d94130b
Added ticket links to game objects
claiireyu Oct 1, 2025
1309c3f
Fixed port from 8001 to 8000
claiireyu Oct 1, 2025
5925182
Merge pull request #40 from cuappdev/fix-duplicate-games
TushigBili Oct 2, 2025
8573e2e
Resolving conflicts
claiireyu Oct 8, 2025
8ff59fd
Merge pull request #41 from cuappdev/claire/ticketlinks
claiireyu Oct 8, 2025
580a3f3
Added Daily Sun Images
claiireyu Oct 8, 2025
f15c721
Merge pull request #42 from cuappdev/claire/dailySunImages
claiireyu Oct 15, 2025
7cfaaf9
Added find by date query to support advanced filters for frontend
TushigBili Oct 22, 2025
0869246
Add duration field to YoutubeVideo model and related mutations
claiireyu Oct 23, 2025
f89c543
Merge pull request #43 from cuappdev/advanced-filters
TushigBili Oct 27, 2025
5348a28
Update youtube_stats.py
claiireyu Oct 27, 2025
d63d932
Merge pull request #44 from cuappdev/claire/youtubeVideoDuration
claiireyu Oct 27, 2025
c89babe
Refactor article date handling to use ISO 8601 format
claiireyu Nov 17, 2025
a366014
Implement sport type extraction from article titles
claiireyu Nov 17, 2025
0a892fa
Added sports type to youtube videos
TushigBili Nov 19, 2025
aeb28ff
Merge pull request #45 from cuappdev/claire/articleVideoTimeFix
claiireyu Nov 19, 2025
5a71190
Resolve conflicts
claiireyu Nov 19, 2025
8ab9357
Merge pull request #46 from cuappdev/claire/articleSportTypeFix
claiireyu Nov 19, 2025
e7a2ec9
resolve conflicts
TushigBili Nov 19, 2025
8124147
Merge pull request #47 from cuappdev/kevin/youtube-sport-filter
TushigBili Nov 19, 2025
076ecaf
Fixes #50
claiireyu Jan 23, 2026
e848140
Fix #49 for baskerball score breakdowns
claiireyu Jan 30, 2026
59175e0
Merge pull request #51 from cuappdev/claire/boxScoreScraping
claiireyu Jan 30, 2026
485b426
Merge branch 'master' into claire/basketballScoreBreakdown
claiireyu Jan 30, 2026
720feed
Merge pull request #52 from cuappdev/claire/basketballScoreBreakdown
claiireyu Jan 30, 2026
85eac9d
Fix date comparison in news fetching logic to use datetime object ins…
claiireyu Jan 30, 2026
a883b11
Merge pull request #55 from cuappdev/claire/basketballScoreBreakdown
JoshD94 Jan 30, 2026
4b66034
fixed youtube b64 thumbnail null issue
JoshD94 Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env_template
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ YOUTUBE_API_KEY=
MONGO_URI=
MONGO_DB=
STAGE=
DAILY_SUN_URL=
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ To start the project, run the following command in the terminal

## Setting up the database

Add /graphql to the url to access the interactive GraphQL platform
Create a Mongo database named `score_db` and another named `daily_sun_db`. A partnership with the Daily Sun has given us access to their articles which we copy and paginate the results for frontend.

Add /graphql to the url to access the interactive GraphQL platform
73 changes: 73 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from flask_graphql import GraphQLView
from graphene import Schema
from src.schema import Query, Mutation
from src.scrapers.games_scraper import fetch_game_schedule
from src.scrapers.youtube_stats import fetch_videos
from src.scrapers.daily_sun_scrape import fetch_news
from src.services.article_service import ArticleService
from src.utils.team_loader import TeamLoader
import signal
import sys
Expand Down Expand Up @@ -83,6 +87,30 @@ def create_context():
),
)

# Setup command line arguments
def parse_args():
parser = argparse.ArgumentParser(description="Skip scraping tasks, for dev purposes.")
parser.add_argument(
"--no-scrape",
action="store_true",
help="Skips scraping tasks if set, useful for frontend development.",
)
parser.add_argument(
"--no-daily-sun",
action="store_true",
help="Skips using the Daily Sun page for alerts",
)
return parser.parse_args()

# Only parse arguments when running directly (not when imported by gunicorn)
if __name__ == "__main__":
args = parse_args()
else:
# Default args when imported by gunicorn
class DefaultArgs:
no_scrape = False
no_daily_sun = False
args = DefaultArgs()

def signal_handler(sig, frame):
sys.exit(0)
Expand All @@ -91,5 +119,50 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

# Only parse arguments when running directly (not when imported by gunicorn)
if __name__ == "__main__":
args = parse_args()
else:
# Default args when imported by gunicorn
class DefaultArgs:
no_scrape = False
no_daily_sun = False
args = DefaultArgs()

# Only run scraping tasks if not disabled
if not args.no_scrape:
from flask_apscheduler import APScheduler
scheduler = APScheduler()
scheduler.init_app(app)
scheduler.start()

@scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours
def scrape_schedules():
logging.info("Scraping game schedules...")
fetch_game_schedule()

@scheduler.task("interval", id="scrape_videos", seconds=43200) # 12 hours
def scrape_videos():
logging.info("Scraping YouTube videos...")
fetch_videos()

scrape_schedules()
scrape_videos()

if not args.no_daily_sun and not args.no_scrape:
@scheduler.task("interval", id="scrape_daily_sun", seconds=3600)
def scrape_daily_sun():
logging.info("Getting Daily Sun Sports News...")
fetch_news()

@scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week
def cleanse_daily_sun_db():
logging.info("Cleaning the Daily Sun database from old articles...")
ArticleService.cleanse_old_articles()

scrape_daily_sun()
cleanse_daily_sun_db()


if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=8000)
26 changes: 26 additions & 0 deletions src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def keep_connection_alive():

# Access the database
db = client[os.getenv("MONGO_DB", "score_db")]
daily_sun_db = client[os.getenv("DAILY_SUN_DB", "daily_sun_db")]


def setup_database_indexes():
Expand All @@ -65,6 +66,31 @@ def setup_database_indexes():

# Index for sorting operations
game_collection.create_index([("date", -1)], background=True)

# Index to have unique games so we won't add duplicates
game_collection.create_index(
[
("sport", 1),
("gender", 1),
("date", 1),
("opponent_id", 1),
("state", 1),
],
unique=True,
background=True
)

# Additional index for tournament games (without opponent_id)
game_collection.create_index(
[
("sport", 1),
("gender", 1),
("date", 1),
("city", 1),
("state", 1),
],
background=True
)

print("✅ MongoDB indexes created successfully")
except Exception as e:
Expand Down
3 changes: 2 additions & 1 deletion src/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .game import Game
from .team import Team
from .youtube_video import YoutubeVideo
from .youtube_video import YoutubeVideo
from .article import Article
56 changes: 56 additions & 0 deletions src/models/article.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from bson.objectid import ObjectId
from datetime import datetime

class Article:
"""
A model representing a news article.

Attributes:
- title: The title of the article
- image: The filename of the article's main image
- sports_type: The specific sport category
- published_at: The publication date
- url: The URL to the full article
- slug: Unique identifier from the source
- created_at: When the article was added to our DB
"""
def __init__(self, title, sports_type, published_at, url, slug, image=None, id=None, created_at=None):
self.id = id if id else str(ObjectId())
self.title = title
self.image = image
self.sports_type = sports_type
self.published_at = published_at
self.url = url
self.slug = slug
self.created_at = created_at if created_at else datetime.now()

def to_dict(self):
"""
Converts the Article object to a dictionary format for MongoDB storage.
"""
return {
"_id": self.id,
"title": self.title,
"image": self.image,
"sports_type": self.sports_type,
"published_at": self.published_at,
"url": self.url,
"slug": self.slug,
"created_at": self.created_at
}

@staticmethod
def from_dict(data):
"""
Converts a MongoDB document to an Article object.
"""
return Article(
id=data.get("_id"),
title=data.get("title"),
image=data.get("image"),
sports_type=data.get("sports_type"),
published_at=data.get("published_at"),
url=data.get("url"),
slug=data.get("slug"),
created_at=data.get("created_at")
)
5 changes: 5 additions & 0 deletions src/models/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Game:
- `time` The time of the game. (optional)
- `box_score` The scoring summary of the game (optional)
- `score_breakdown` The scoring breakdown of the game (optional)
- 'ticket_link' The ticket link for the game (optional)
"""

def __init__(
Expand All @@ -35,6 +36,7 @@ def __init__(
score_breakdown=None,
team=None,
utc_date=None,
ticket_link=None,
):
self.id = id if id else str(ObjectId())
self.city = city
Expand All @@ -50,6 +52,7 @@ def __init__(
self.score_breakdown = score_breakdown
self.team = team
self.utc_date = utc_date
self.ticket_link = ticket_link

def to_dict(self):
"""
Expand All @@ -70,6 +73,7 @@ def to_dict(self):
"score_breakdown": self.score_breakdown,
"team": self.team,
"utc_date": self.utc_date,
"ticket_link": self.ticket_link,
}

@staticmethod
Expand All @@ -92,4 +96,5 @@ def from_dict(data) -> None:
score_breakdown=data.get("score_breakdown"),
team=data.get("team"),
utc_date=data.get("utc_date"),
ticket_link=data.get("ticket_link"),
)
6 changes: 5 additions & 1 deletion src/models/youtube_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ class YoutubeVideo:
- `thumbnail` The thumbnail of the video, as a URL string pointing to a `.jpg` file.
- `url` The URL of the video.
- `published_at` The date and time the video was published.
- `duration` The duration of the video.
"""

def __init__(
self, title, description, thumbnail, b64_thumbnail, url, published_at, id=None
self, title, description, thumbnail, b64_thumbnail, url, published_at, duration=None, id=None
):
self.id = id if id else str(ObjectId())
self.title = title
Expand All @@ -24,6 +25,7 @@ def __init__(
self.b64_thumbnail = b64_thumbnail
self.url = url
self.published_at = published_at
self.duration = duration

def to_dict(self):
"""
Expand All @@ -37,6 +39,7 @@ def to_dict(self):
"b64_thumbnail": self.b64_thumbnail,
"url": self.url,
"published_at": self.published_at,
"duration": self.duration,
}

@staticmethod
Expand All @@ -52,4 +55,5 @@ def from_dict(data):
b64_thumbnail=data.get("b64_thumbnail"),
url=data.get("url"),
published_at=data.get("published_at"),
duration=data.get("duration"),
)
3 changes: 2 additions & 1 deletion src/mutations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .create_game import CreateGame
from .create_team import CreateTeam
from .create_youtube_video import CreateYoutubeVideo
from .create_youtube_video import CreateYoutubeVideo
from .create_article import CreateArticle
26 changes: 26 additions & 0 deletions src/mutations/create_article.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from graphene import Mutation, String, Field
from src.types import ArticleType
from src.services.article_service import ArticleService

class CreateArticle(Mutation):
class Arguments:
title = String(required=True)
sports_type = String(required=True)
published_at = String(required=True)
url = String(required=True)
slug = String(required=True)
image = String(required=False)

article = Field(lambda: ArticleType)

def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
article_data = {
"title": title,
"sports_type": sports_type,
"published_at": published_at, # Already in ISO 8601 format
"url": url,
"slug": slug,
"image": image
}
new_article = ArticleService.create_article(article_data)
return CreateArticle(article=new_article)
7 changes: 5 additions & 2 deletions src/mutations/create_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Arguments:
box_score = String(required=False)
score_breakdown = String(required=False)
utc_date = String(required=False)
ticket_link = String(required=False)

game = Field(lambda: GameType)

Expand All @@ -34,7 +35,8 @@ def mutate(
time=None,
box_score=None,
score_breakdown=None,
utc_date=None
utc_date=None,
ticket_link=None
):
game_data = {
"city": city,
Expand All @@ -48,7 +50,8 @@ def mutate(
"time": time,
"box_score": box_score,
"score_breakdown": score_breakdown,
"utc_date": utc_date
"utc_date": utc_date,
"ticket_link": ticket_link
}
new_game = GameService.create_game(game_data)
return CreateGame(game=new_game)
6 changes: 4 additions & 2 deletions src/mutations/create_youtube_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ class Arguments:
title = String(required=True)
description = String(required=True)
thumbnail = String(required=True)
b64_thumbnail = String(required=True)
b64_thumbnail = String(required=False)
url = String(required=True)
published_at = String(required=True)
duration = String(required=True)

youtube_video = Field(lambda: YoutubeVideoType)

def mutate(self, info, id, title, description, thumbnail, url, published_at):
def mutate(self, info, id, title, description, thumbnail, url, published_at, duration, b64_thumbnail=None):
video_data = {
"id": id,
"title": title,
Expand All @@ -23,6 +24,7 @@ def mutate(self, info, id, title, description, thumbnail, url, published_at):
"b64_thumbnail": b64_thumbnail,
"url": url,
"published_at": published_at,
"duration": duration,
}
new_video = YoutubeVideoService.create_video(video_data)
return CreateYoutubeVideo(youtube_video=new_video)
1 change: 1 addition & 0 deletions src/queries/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .game_query import GameQuery
from .team_query import TeamQuery
from .youtube_video_query import YoutubeVideoQuery
from .article_query import ArticleQuery
12 changes: 12 additions & 0 deletions src/queries/article_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from graphene import ObjectType, List, String
from src.services.article_service import ArticleService
from src.types import ArticleType

class ArticleQuery(ObjectType):
articles = List(ArticleType, sports_type=String())

def resolve_articles(self, info, sports_type=None):
"""
Resolver for retrieving news articles, optionally filtered by sports_type.
"""
return ArticleService.get_articles(sports_type)
Loading
Loading