From c3ccf9e8f8fb896b1a1bb85880f553484d72066d Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 07:47:33 -0500
Subject: [PATCH 1/7] feat(app): update to accomodate changes in data sources
 app

---
 AccessManager.py            |  50 +++++
 CacheManager.py             |  40 ++++
 Dockerfile                  |   2 +
 InternetArchiveInterface.py |  47 +++++
 Jenkinsfile                 |  14 ++
 PDAPInterface.py            |  49 +++++
 README.md                   |   3 +
 cache_url.py                | 374 +++++++++++++++++++++---------------
 enums.py                    |  15 ++
 exceptions.py               |   2 +
 requirements.txt            |   4 +-
 11 files changed, 442 insertions(+), 158 deletions(-)
 create mode 100644 AccessManager.py
 create mode 100644 CacheManager.py
 create mode 100644 InternetArchiveInterface.py
 create mode 100644 PDAPInterface.py
 create mode 100644 enums.py
 create mode 100644 exceptions.py

diff --git a/AccessManager.py b/AccessManager.py
new file mode 100644
index 0000000..d7bd03f
--- /dev/null
+++ b/AccessManager.py
@@ -0,0 +1,50 @@
+import os
+
+import requests
+
+
+
+class AccessManager:
+    """
+    Manages access to the API, handling logins and access token storage
+    """
+
+    def __init__(
+            self,
+            email: str,
+            password: str
+    ):
+        self.email = email
+        self.password = password
+        self.access_token = None
+        self.refresh_token = None
+        self.login()
+
+    def get_bearer_authorization_header(self):
+        return {"Authorization": f"Bearer {self.access_token}"}
+
+    def login(self):
+        response = requests.post(
+            f"{os.getenv('VITE_VUE_APP_BASE_URL')}/auth/login",
+            json={
+                "email": self.email,
+                "password": self.password
+            },
+            timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+        self.access_token = data["access_token"]
+        self.refresh_token = data["refresh_token"]
+
+    def refresh_access_token(self):
+        response = requests.post(
+            f"{os.getenv('VITE_VUE_APP_BASE_URL')}/auth/refresh-session",
+            headers={
+                "Authorization": f"Bearer {self.refresh_token}"
+            },
+            timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+        self.access_token = data["access_token"]
\ No newline at end of file
diff --git a/CacheManager.py b/CacheManager.py
new file mode 100644
index 0000000..80766d7
--- /dev/null
+++ b/CacheManager.py
@@ -0,0 +1,40 @@
+import datetime
+import json
+import os
+
+
+class CacheManager:
+    """
+    Manages an internal cache for tracking when source URLs were last cached
+    in the internet archive
+    This is to minimize the number of requests to the archive
+    """
+
+    def __init__(self):
+        # If the file does not exist, create it
+        if not os.path.exists("cache.json"):
+            with open("cache.json", "w") as f:
+                f.write("{}")
+
+        # Load the cache
+        with open("cache.json", "r") as f:
+            self.cache = json.load(f)
+
+    def save_cache(self):
+        with open("cache.json", "w") as f:
+            json.dump(self.cache, f)
+
+    def datetime_to_str(self, last_cached: datetime.datetime) -> str:
+        return last_cached.strftime("%Y-%m-%d")
+
+    def str_to_datetime(self, last_cached: str) -> datetime.datetime:
+        return datetime.datetime.strptime(last_cached, "%Y-%m-%d")
+
+    def get_last_cached(self, source_url: str) -> datetime.datetime:
+        return self.str_to_datetime(self.cache.get(source_url))
+
+    def set_last_cached(self, source_url: str, last_cached: datetime.datetime):
+        self.cache[source_url] = self.datetime_to_str(last_cached)
+
+    def has_source_url(self, source_url: str) -> bool:
+        return source_url in self.cache
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 6b0d429..e4f4598 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,6 +2,8 @@ FROM python:3.11
 
 WORKDIR /usr/src/app
 
+RUN apt-get update && apt-get install -y curl
+
 COPY requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt
 
diff --git a/InternetArchiveInterface.py b/InternetArchiveInterface.py
new file mode 100644
index 0000000..0c50836
--- /dev/null
+++ b/InternetArchiveInterface.py
@@ -0,0 +1,47 @@
+import time
+from datetime import datetime
+
+import requests
+
+from exceptions import ArchiveLastCacheNotFoundError
+
+
+class InternetArchiveInterface:
+
+    def __init__(self, s3_keys: str):
+        self.s3_keys = s3_keys
+
+    def get_website_info_data(self, source_url):
+        website_info = requests.get(
+            f"https://archive.org/wayback/available?url={source_url}",
+            timeout=10
+        )
+        website_info_data = website_info.json()
+        return website_info_data
+
+    def get_website_info_data_last_cached(self, source_url):
+        website_info_data = self.get_website_info_data(source_url)
+        if not website_info_data["archived_snapshots"]:
+            raise ArchiveLastCacheNotFoundError
+        return datetime.strptime(
+            website_info_data["archived_snapshots"]["closest"]["timestamp"],
+            "%Y%m%d%H%M%S",
+        )
+
+    def save_to_internet_archive(self, entry: dict, source_url: str, wait_time: int):
+        """
+        Wait then post to Internet Archive
+        :param entry:
+        :param source_url:
+        :param wait_time: The amount of time to wait
+        :return:
+        """
+        api_url = f"http://web.archive.org/save/{source_url}"
+        time.sleep(wait_time)
+        requests.post(
+            api_url,
+            headers={"Authorization": f"LOW {self.s3_keys}"},
+            timeout=10
+        )
+        # Update the last_cached date if cache is successful
+        entry["last_cached"] = datetime.now().strftime("%Y-%m-%d")
diff --git a/Jenkinsfile b/Jenkinsfile
index 1114241..88e3fa4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -14,12 +14,26 @@ pipeline {
     }
 
     stages {
+        stage('Retrieve JSON from Previous Build') {
+            steps {
+                copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
+                script {
+                    def jsonContent = readFile 'cache.json'
+                    def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
+                    echo "Loaded JSON from previous build: ${jsonData}"
+                }
+            }
+        }
         stage('Run Automatic Archive') {
             steps {
                 echo 'Running Automatic Archive...'
                 sh 'python cache_url.py'
             }
         }
+        stage('Save cache') {
+            steps {
+                archiveArtifacts artifacts: 'cache.json', fingerprint: true
+            }
     }
     post {
         failure {
diff --git a/PDAPInterface.py b/PDAPInterface.py
new file mode 100644
index 0000000..6feabec
--- /dev/null
+++ b/PDAPInterface.py
@@ -0,0 +1,49 @@
+import json
+from datetime import datetime
+
+import requests
+
+from enums import UpdateFrequency
+
+
+class PDAPInterface:
+
+    def __init__(self, base_url: str):
+        self.base_url = base_url
+
+
+    def update_pdap_archives(
+            self,
+            entry: dict,
+            authorization_header: dict
+    ):
+        """
+        Update data in PDAP archives
+        :param entry:
+        :return:
+        """
+        response = requests.put(
+            f"{self.base_url}/archives",
+            json=entry,
+            headers=authorization_header,
+            timeout=10
+        )
+        response.raise_for_status()
+
+    def get_from_pdap_archives(
+            self,
+            authorization_header: dict,
+            update_frequency: UpdateFrequency,
+            last_archived_before: datetime
+    ):
+        response = requests.get(
+            f"{self.base_url}/archives",
+            params={
+                "update_frequency": update_frequency.value,
+                "last_archived_before": last_archived_before.isoformat()
+            },
+            headers=authorization_header,
+            timeout=10
+        )
+        response.raise_for_status()
+        return response.json()
\ No newline at end of file
diff --git a/README.md b/README.md
index d97f1a5..ac3519a 100644
--- a/README.md
+++ b/README.md
@@ -10,4 +10,7 @@ Requires the following environment variables to be set:
 ```text
 VUE_APP_PDAP_API_KEY=<YOUR_PDAP_API_KEY>
 VITE_VUE_APP_BASE_URL=<YOUR_PDAP_API_URL>
+PDAP_EMAIL=<YOUR_PDAP_EMAIL>
+PDAP_PASSWORD=<YOUR_PDAP_PASSWORD>
+INTERNET_ARCHIVE_S3_KEYS=<YOUR_INTERNET_ARCHIVE_S3_KEYS>
 ```
\ No newline at end of file
diff --git a/cache_url.py b/cache_url.py
index 5d82c2f..1f216ef 100644
--- a/cache_url.py
+++ b/cache_url.py
@@ -1,18 +1,24 @@
 import json
 from datetime import datetime, timedelta
 from dataclasses import dataclass
+
+from dotenv import load_dotenv
 from tqdm import tqdm
 import requests
 import os
 import time
 
+from AccessManager import AccessManager
+from CacheManager import CacheManager
+from InternetArchiveInterface import InternetArchiveInterface
+from PDAPInterface import PDAPInterface
+from enums import UpdateFrequency
+from exceptions import ArchiveLastCacheNotFoundError
+
 # How long to wait in between archive requests, in seconds
 # Too many requests will result in the IP being temporarily blocked: https://archive.org/details/toomanyrequests_20191110
 ARCHIVE_WAIT_TIME = 7
 
-class ArchiveLastCacheNotFoundError(Exception):
-    pass
-
 
 @dataclass
 class ArchiveEntry:
@@ -27,7 +33,7 @@ def from_dict(cls, dict_entry: dict):
         return cls(**dict_entry)
 
 
-API_KEY = "Bearer " + os.getenv("VUE_APP_PDAP_API_KEY")
+API_KEY = "Basic " + os.getenv("VUE_APP_PDAP_API_KEY")
 UPDATE_FREQUENCY_MAPPING = {
     "Incident-based": 7,
     "< Hourly": 1 / 24,
@@ -44,69 +50,6 @@ def from_dict(cls, dict_entry: dict):
     "Other": None,
 }
 
-
-def archive_url(entry: dict):
-    """
-
-    :param entry:
-    :return:
-    """
-    entry["broken_source_url_as_of"] = None
-    source_url = entry.get("source_url")
-
-    try:
-        wait_then_post(entry, source_url, ARCHIVE_WAIT_TIME)
-    except Exception as error:
-        try:
-            wait_then_post(entry, source_url, 10)
-        except Exception as error:
-            print(str(error))
-    # Send updated data to Data Sources
-    update_pdap_archives(entry)
-
-def wait_then_post(entry: dict, source_url: str, wait_time: int):
-    """
-    Wait then post to Internet Archive
-    :param entry:
-    :param source_url:
-    :param wait_time: The amount of time to wait
-    :return:
-    """
-    api_url = f"http://web.archive.org/save/{source_url}"
-    time.sleep(wait_time)
-    requests.post(api_url)
-    # Update the last_cached date if cache is successful
-    entry["last_cached"] = datetime.now().strftime("%Y-%m-%d")
-
-
-def handle_missing_source_url(entry: dict):
-    """
-    Record when url was found to be missing,
-    update PDAP archives, and throw exception
-    :param entry:
-    :return:
-    """
-    entry["broken_source_url_as_of"] = datetime.now().strftime("%Y-%m-%d")
-    update_pdap_archives(entry)
-    raise Exception("No source_url")
-
-
-def update_pdap_archives(entry: dict):
-    """
-    Update data in PDAP archives
-    :param entry:
-    :return:
-    """
-    entry_json = json.dumps(entry)
-    response = requests.put(
-        f"{os.getenv('VITE_VUE_APP_BASE_URL')}/archives",
-        json=entry_json,
-        headers={"Authorization": API_KEY},
-    )
-    response.raise_for_status()
-
-
-
 def get_update_delta(update_frequency: str | None) -> timedelta:
     """
     Calculate update delt based on entry's update frequency
@@ -122,107 +65,224 @@ def get_update_delta(update_frequency: str | None) -> timedelta:
     return timedelta(days=int(update_delta))
 
 
-def get_website_info_data_last_cached(source_url) -> datetime:
-    website_info_data = get_website_info_data(source_url)
-    if not website_info_data["archived_snapshots"]:
-        raise ArchiveLastCacheNotFoundError
-    return datetime.strptime(
-        website_info_data["archived_snapshots"]["closest"]["timestamp"],
-        "%Y%m%d%H%M%S",
-    )
-
-
-def get_last_archived(last_archived: str | None, source_url: str) -> datetime:
-    """
-    Get last archived date of website from Internet Archive.
-    :param entry:
-    :param source_url:
-    :return:
-    """
-    if last_archived is not None:
-        try:
-            return datetime.strptime(last_archived, "%Y-%m-%d")
-        except ValueError:
-            return datetime.min
-    # Check if website exists in archive and compare archived website to current site
-    last_archived = datetime.min
+def last_archived_or_datetime_min(last_archived):
     try:
-        website_info_data_last_cached = get_website_info_data_last_cached(source_url)
-    except ArchiveLastCacheNotFoundError:
-        return last_archived
-    if website_info_data_last_cached > last_archived:
-        return website_info_data_last_cached
-    return last_archived
+        return datetime.strptime(last_archived, "%Y-%m-%d")
+    except ValueError:
+        return datetime.min
 
 
-def get_website_info_data(source_url):
-    website_info = requests.get(
-        f"https://archive.org/wayback/available?url={source_url}"
-    )
-    website_info_data = website_info.json()
-    return website_info_data
-
+def missing_source_url(entry: dict):
+    return entry['source_url'] is None
 
-def main():
-    data = get_from_archives()
-    extract_url_info_and_archived_if_needed(data)
+def is_overdue_for_update(last_archived, update_delta):
+    return last_archived + update_delta < datetime.now()
 
 
-def extract_url_info_and_archived_if_needed(data: list[dict]):
-    """
+def get_past_date(reference_date: datetime, frequency: UpdateFrequency) -> datetime:
+    year_delta = timedelta(days=365)
+
+    frequency_map = {
+        UpdateFrequency.WEEKLY: timedelta(weeks=1),
+        UpdateFrequency.DAILY: timedelta(days=1),
+        UpdateFrequency.RARELY_UPDATED: year_delta,  # Arbitrary long period
+        UpdateFrequency.MORE_THAN_ANNUALLY: year_delta,
+        UpdateFrequency.ON_REQUEST: year_delta,  # Arbitrary long period
+        UpdateFrequency.MONTHLY: timedelta(days=30),
+        UpdateFrequency.INCIDENT_BASED: year_delta,  # Arbitrary long period
+        UpdateFrequency.ANNUALLY: year_delta,
+        UpdateFrequency.LESS_THAN_HOURLY: timedelta(minutes=30),  # Arbitrary small period
+        UpdateFrequency.BI_WEEKLY: timedelta(weeks=2),
+        UpdateFrequency.HOURLY: timedelta(hours=1),
+        UpdateFrequency.QUARTERLY: timedelta(days=90),
+    }
+
+    return reference_date - frequency_map.get(frequency, timedelta(days=0))
+
+
+class Archiver:
+
+    def __init__(
+            self,
+            access_manager: AccessManager,
+            cache_manager: CacheManager,
+            internet_archive_interface: InternetArchiveInterface,
+            pdap_interface: PDAPInterface
+    ):
+        self.access_manager = access_manager
+        self.cache_manager = cache_manager
+        self.internet_archive_interface = internet_archive_interface
+        self.pdap_interface = pdap_interface
+
+    def get_from_pdap_archives(
+            self,
+            update_frequency: UpdateFrequency,
+            last_archived_before: datetime
+    ):
+        return self.pdap_interface.get_from_pdap_archives(
+            authorization_header=self.access_manager.get_bearer_authorization_header(),
+            update_frequency=update_frequency,
+            last_archived_before=last_archived_before
+        )
+
+    def handle_missing_source_url(self, entry: dict):
+        entry["broken_source_url_as_of"] = datetime.now().strftime("%Y-%m-%d")
+        self.update_pdap_archives(entry)
+        raise Exception("No source_url")
+
+    def get_last_archived(
+            self,
+            last_archived: str | None,
+            source_url: str,
+    ) -> datetime:
+        """
+        Get last archived date of website from Internet Archive.
+        :param entry:
+        :param source_url:
+        :return:
+        """
+        if last_archived is not None:
+            return last_archived_or_datetime_min(last_archived)
+        # Check if website exists in archive and compare archived website to current site
+        last_archived = datetime.min
+        try:
+            if self.cache_manager.has_source_url(source_url):
+                website_info_data_last_cached = self.cache_manager.get_last_cached(source_url)
+            else:
+                website_info_data_last_cached = self.internet_archive_interface.get_website_info_data_last_cached(
+                    source_url
+                )
+                self.cache_manager.set_last_cached(source_url, website_info_data_last_cached)
+        except ArchiveLastCacheNotFoundError:
+            self.cache_manager.set_last_cached(source_url, last_archived)
+            return last_archived
+        if website_info_data_last_cached > last_archived:
+            return website_info_data_last_cached
+        return last_archived
 
-    :param data:
-    :return:
-    """
-    # Create a tuple of entries with missing source URLs
-    missing_source_url_entries = tuple(filter(missing_source_url, data))
-
-    # Handle entries with missing source URLs
-    print("Handling missing source urls")
-    for entry in tqdm(missing_source_url_entries):
-        handle_missing_source_url(entry)
-
-    print("\nFinding entries that need updates")
-    non_missing_source_url_entries = tuple(filter(lambda e: not missing_source_url(e), data))
-    entries_needing_updates = []
-    for entry in tqdm(non_missing_source_url_entries):
-        if needs_updated(entry):
-            entries_needing_updates.append(entry)
-
-    print(f"Updating {len(entries_needing_updates)} entries that need updates")
-    # Handle entries that need to be updated
-    for entry in tqdm(entries_needing_updates):
+    def needs_updated(self, entry: dict) -> bool:
+        """
+        Check if entry needs to be updated
+        :param entry:
+        :return:
+        """
+        last_archived = self.get_last_archived(
+            last_archived=entry["last_cached"],
+            source_url=entry["source_url"],
+        )
+        update_delta = get_update_delta(entry["update_frequency"])
+        return is_overdue_for_update(last_archived, update_delta)
+
+    def get_entries_needing_updates(self, data):
+        non_missing_source_url_entries = tuple(filter(lambda e: not missing_source_url(e), data))
+        entries_needing_updates = []
+        for entry in tqdm(non_missing_source_url_entries):
+            if self.needs_updated(entry):
+                entries_needing_updates.append(entry)
+        self.cache_manager.save_cache()
+        return entries_needing_updates
+
+    def update_pdap_archives(
+            self,
+            entry: dict,
+    ):
+        """
+        Update data in PDAP archives
+        :param entry:
+        :return:
+        """
         try:
-            archive_url(entry)
+            self.pdap_interface.update_pdap_archives(
+                entry=entry,
+                authorization_header=self.access_manager.get_bearer_authorization_header(),
+            )
         except Exception as error:
-            print(str(error))
+            # Try again after refreshing access token
+            self.access_manager.refresh_access_token()
+            self.pdap_interface.update_pdap_archives(
+                entry=entry,
+                authorization_header=self.access_manager.get_bearer_authorization_header(),
+            )
+
+
+    def archive_url(
+            self,
+            entry: dict,
+    ):
+        """
+
+        :param entry:
+        :return:
+        """
+        entry["broken_source_url_as_of"] = None
+        source_url = entry.get("source_url")
 
-def missing_source_url(entry: dict):
-    return entry['source_url'] is None
+        try:
+            self.internet_archive_interface.save_to_internet_archive(entry, source_url, ARCHIVE_WAIT_TIME)
+            self.cache_manager.set_last_cached(source_url, datetime.now())
+        except Exception as error:
+            try:
+                self.internet_archive_interface.save_to_internet_archive(entry, source_url, 10)
+                self.cache_manager.set_last_cached(source_url, datetime.now())
+            except Exception as error:
+                print(str(error))
+        # Send updated data to Data Sources
+        self.update_pdap_archives(
+            entry,
+        )
+
+    def update_entries(self, entries_needing_updates):
+        for entry in tqdm(entries_needing_updates):
+            self.archive_url(entry)
+
+    def extract_url_info_and_archive_if_needed(self, data):
+        """
+        :param data:
+        :return:
+        """
+        # Create a tuple of entries with missing source URLs
+        missing_source_url_entries = tuple(filter(missing_source_url, data))
+
+        # Handle entries with missing source URLs
+        print("Handling missing source urls")
+        for entry in tqdm(missing_source_url_entries):
+            self.handle_missing_source_url(entry)
+
+        print("\nFinding entries that need updates")
+        entries_needing_updates = self.get_entries_needing_updates(data)
+
+        print(f"Updating {len(entries_needing_updates)} entries that need updates")
+        # Handle entries that need to be updated
+        self.update_entries(entries_needing_updates)
+
+    def main(self):
+        today = datetime.today()
+        for update_frequency in UpdateFrequency:
+            print(f"Getting {update_frequency.value} entries...")
+            last_date = get_past_date(reference_date=today, frequency=update_frequency)
+            data = self.get_from_pdap_archives(
+                last_archived_before=last_date,
+                update_frequency=update_frequency
+            )
+            self.extract_url_info_and_archive_if_needed(
+                data,
+            )
 
-def needs_updated(entry: dict) -> bool:
-    """
-    Check if entry needs to be updated
-    :param entry:
-    :return:
-    """
-    last_archived = get_last_archived(entry["last_cached"], entry["source_url"])
-    update_delta = get_update_delta(entry["update_frequency"])
-    return last_archived + update_delta < datetime.now()
+if __name__ == "__main__":
 
-def get_from_archives() -> list[dict]:
-    """
-    Get data from PDAP Archive.
-    :param url:
-    :return:
-    """
-    response = requests.get(
-        f"{os.getenv('VITE_VUE_APP_BASE_URL')}/archives",
-        headers={"Authorization": API_KEY},
+    load_dotenv()
+    archiver = Archiver(
+        access_manager=AccessManager(
+            email=os.getenv("PDAP_EMAIL"),
+            password=os.getenv("PDAP_PASSWORD"),
+        ),
+        cache_manager=CacheManager(),
+        internet_archive_interface=InternetArchiveInterface(
+            s3_keys=os.getenv("INTERNET_ARCHIVE_S3_KEYS")
+        ),
+        pdap_interface=PDAPInterface(
+            base_url=os.getenv("VITE_VUE_APP_BASE_URL")
+        )
     )
-    response.raise_for_status()
-    return response.json()
+    archiver.main()
 
-
-if __name__ == "__main__":
-    main()
diff --git a/enums.py b/enums.py
new file mode 100644
index 0000000..4195a24
--- /dev/null
+++ b/enums.py
@@ -0,0 +1,15 @@
+from enum import Enum
+
+class UpdateFrequency(Enum):
+    WEEKLY = "Weekly"
+    DAILY = "Daily"
+    RARELY_UPDATED = "No updates / rarely updated"
+    MORE_THAN_ANNUALLY = "> Annually"
+    ON_REQUEST = "On request"
+    MONTHLY = "Monthly"
+    INCIDENT_BASED = "Incident-based"
+    ANNUALLY = "Annually"
+    LESS_THAN_HOURLY = "< Hourly"
+    BI_WEEKLY = "Bi-weekly"
+    HOURLY = "Hourly"
+    QUARTERLY = "Quarterly"
\ No newline at end of file
diff --git a/exceptions.py b/exceptions.py
new file mode 100644
index 0000000..3d88c95
--- /dev/null
+++ b/exceptions.py
@@ -0,0 +1,2 @@
+class ArchiveLastCacheNotFoundError(Exception):
+    pass
diff --git a/requirements.txt b/requirements.txt
index b483a61..8805968 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,6 @@ requests==2.28.2
 savepagenow==1.2.3
 urllib3==1.26.15
 pytest==8.2.1
-tqdm==4.66.4
\ No newline at end of file
+tqdm==4.66.4
+python-dotenv~=1.0.1
+internetarchive~=5.2.1
\ No newline at end of file

From c02e841b355c31ee4098e2fb7843fc17c2456e5e Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 07:54:08 -0500
Subject: [PATCH 2/7] feat(app): update to accomodate changes in data sources
 app

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 88e3fa4..6ede734 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -34,6 +34,7 @@ pipeline {
             steps {
                 archiveArtifacts artifacts: 'cache.json', fingerprint: true
             }
+        }
     }
     post {
         failure {

From 16e7001de1dccea6e8197031b3f9dcc23da62a77 Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 07:59:36 -0500
Subject: [PATCH 3/7] feat(app): update to accomodate changes in data sources
 app

---
 Jenkinsfile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 6ede734..0bfe4f5 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,13 +15,13 @@ pipeline {
 
     stages {
         stage('Retrieve JSON from Previous Build') {
-            steps {
+            try {
                 copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
-                script {
-                    def jsonContent = readFile 'cache.json'
-                    def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
-                    echo "Loaded JSON from previous build: ${jsonData}"
-                }
+                def jsonContent = readFile 'cache.json'
+                def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
+                echo "Loaded JSON from previous build: ${jsonData}"
+            } catch (Exception e) {
+                echo "No previous successful build found, proceeding without cache."
             }
         }
         stage('Run Automatic Archive') {

From ca5457e627b76396159472222a7294c967eceb75 Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 08:01:16 -0500
Subject: [PATCH 4/7] feat(app): update to accomodate changes in data sources
 app

---
 Jenkinsfile | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0bfe4f5..f9dddfb 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,13 +15,15 @@ pipeline {
 
     stages {
         stage('Retrieve JSON from Previous Build') {
-            try {
-                copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
-                def jsonContent = readFile 'cache.json'
-                def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
-                echo "Loaded JSON from previous build: ${jsonData}"
-            } catch (Exception e) {
-                echo "No previous successful build found, proceeding without cache."
+            steps {
+                try {
+                    copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
+                    def jsonContent = readFile 'cache.json'
+                    def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
+                    echo "Loaded JSON from previous build: ${jsonData}"
+                } catch (Exception e) {
+                    echo "No previous successful build found, proceeding without cache."
+                }
             }
         }
         stage('Run Automatic Archive') {

From 326df936d74e58c91b3d8bbf04fc79ad69f173ae Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 08:02:16 -0500
Subject: [PATCH 5/7] feat(app): update to accomodate changes in data sources
 app

---
 Jenkinsfile | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index f9dddfb..a7822ce 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -16,13 +16,15 @@ pipeline {
     stages {
         stage('Retrieve JSON from Previous Build') {
             steps {
-                try {
-                    copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
-                    def jsonContent = readFile 'cache.json'
-                    def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
-                    echo "Loaded JSON from previous build: ${jsonData}"
-                } catch (Exception e) {
-                    echo "No previous successful build found, proceeding without cache."
+                script{
+                    try {
+                            copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
+                            def jsonContent = readFile 'cache.json'
+                            def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
+                            echo "Loaded JSON from previous build: ${jsonData}"
+                        } catch (Exception e) {
+                            echo "No previous successful build found, proceeding without cache."
+                    }
                 }
             }
         }

From 240bebe07ea6914e07ddeb365e2155f794222843 Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Fri, 7 Mar 2025 08:03:26 -0500
Subject: [PATCH 6/7] feat(app): update to accomodate changes in data sources
 app

---
 Jenkinsfile | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index a7822ce..1114241 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -14,31 +14,12 @@ pipeline {
     }
 
     stages {
-        stage('Retrieve JSON from Previous Build') {
-            steps {
-                script{
-                    try {
-                            copyArtifacts projectName: "${env.JOB_NAME}", filter: 'cache.json', selector: lastSuccessful()
-                            def jsonContent = readFile 'cache.json'
-                            def jsonData = new groovy.json.JsonSlurper().parseText(jsonContent)
-                            echo "Loaded JSON from previous build: ${jsonData}"
-                        } catch (Exception e) {
-                            echo "No previous successful build found, proceeding without cache."
-                    }
-                }
-            }
-        }
         stage('Run Automatic Archive') {
             steps {
                 echo 'Running Automatic Archive...'
                 sh 'python cache_url.py'
             }
         }
-        stage('Save cache') {
-            steps {
-                archiveArtifacts artifacts: 'cache.json', fingerprint: true
-            }
-        }
     }
     post {
         failure {

From fb72b00520978de19e01aa744a35742a1f0106be Mon Sep 17 00:00:00 2001
From: Max Chis <maxachis@gmail.com>
Date: Tue, 11 Mar 2025 14:58:35 -0400
Subject: [PATCH 7/7] fix(app): fix archives get to call use `YYYY-MM-DD`
 formatting

---
 PDAPInterface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PDAPInterface.py b/PDAPInterface.py
index 6feabec..e456d5d 100644
--- a/PDAPInterface.py
+++ b/PDAPInterface.py
@@ -40,7 +40,7 @@ def get_from_pdap_archives(
             f"{self.base_url}/archives",
             params={
                 "update_frequency": update_frequency.value,
-                "last_archived_before": last_archived_before.isoformat()
+                "last_archived_before": last_archived_before.strftime("%Y-%m-%d")
             },
             headers=authorization_header,
             timeout=10