Align Italy Toscany fetcher with design docs

thiagovmdon · thiagovmdon · commit a5d9a40da3a4 · 2026-03-24T09:34:01.000+01:00
diff --git a/rivretrieve/italy_toscany.py b/rivretrieve/italy_toscany.py
@@ -20,30 +20,18 @@ class ItalyToscanyFetcher(base.RiverDataFetcher):
 
     Data source:
         - monitoring website: https://www.sir.toscana.it/monitoraggio/stazioni.php?type=idro
-        - metadata WFS: https://geo.sir.toscana.it/geoserver/geo/ows
-        - archive download endpoint: https://www.sir.toscana.it/archivio/download.php
+        - historical archive portal: https://www.sir.toscana.it/consistenza-rete
 
     Supported variables:
         - ``constants.DISCHARGE_DAILY_MEAN`` (m³/s)
         - ``constants.STAGE_DAILY_MEAN`` (m)
 
     Data description and API:
-        - public idrometer metadata layer: ``geo:cf_idrometri``
-        - monitoring station table: ``monitoraggio/stazioni.php?type=idro``
-        - historical download endpoint parameters:
-          ``IDST=idro_p`` for discharge and ``IDST=idro_l`` for stage
+        - archive data description: https://www.sir.toscana.it/consistenza-rete
+        - GIS layers overview for idrometers: https://www.sir.toscana.it/strati-gis
 
     Terms of use:
-        - see https://www.sir.toscana.it/
-
-    Notes:
-        - metadata merges the static WFS idrometer layer with the public monitoring table
-          so river names and basin labels are retained alongside stable coordinates
-        - coordinates are transformed from EPSG:3003 to WGS84
-        - the archive endpoint returns provider CSV files that use semicolons,
-          decimal commas, Latin-1 text, and a separate quality-flag column
-        - some stations do not expose discharge data in the archive;
-          in those cases ``get_data()`` returns an empty DataFrame
+        - data usage notes for archived data: https://www.sir.toscana.it/consistenza-rete
     """
 
     METADATA_URL = (
@@ -217,7 +205,11 @@ def _parse_station_table(cls, text: str) -> pd.DataFrame:
         return df.set_index(constants.GAUGE_ID)
 
     def get_metadata(self) -> pd.DataFrame:
-        """Fetches live metadata for Italy-Toscany stations."""
+        """Fetches live metadata for Italy-Toscany stations.
+
+        Merges the live GIS layer with the public monitoring table and returns
+        a DataFrame indexed by ``constants.GAUGE_ID``.
+        """
         session = utils.requests_retry_session(
             retries=6,
             backoff_factor=1,
@@ -342,7 +334,30 @@ def get_data(
         start_date: Optional[str] = None,
         end_date: Optional[str] = None,
     ) -> pd.DataFrame:
-        """Fetches and parses time series data for a specific gauge and variable."""
+        """Fetches and parses time series data for a specific gauge and variable.
+
+        This method retrieves the requested data from the provider's archive service,
+        parses it, and returns it in a standardized pandas DataFrame format.
+
+        Args:
+            gauge_id: The site-specific identifier for the gauge.
+            variable: The variable to fetch. Must be one of the strings listed
+                in the fetcher's ``get_available_variables()`` output.
+                These are typically defined in ``rivretrieve.constants``.
+            start_date: Optional start date for the data retrieval in 'YYYY-MM-DD' format.
+                If None, data is fetched from the earliest available date.
+            end_date: Optional end date for the data retrieval in 'YYYY-MM-DD' format.
+                If None, data is fetched up to the latest available date.
+
+        Returns:
+            pd.DataFrame: A pandas DataFrame indexed by datetime objects (``constants.TIME_INDEX``)
+            with a single column named after the requested ``variable``. The DataFrame
+            will be empty if no data is found for the given parameters.
+
+        Raises:
+            ValueError: If the requested ``variable`` is not supported by this fetcher.
+            Exception: For unexpected download or parsing errors.
+        """
         start_date = utils.format_start_date(start_date)
         end_date = utils.format_end_date(end_date)
 
diff --git a/tests/test_italy_toscany.py b/tests/test_italy_toscany.py
@@ -1,5 +1,4 @@
 import json
-import os
 import unittest
 from pathlib import Path
 from unittest.mock import MagicMock, patch
@@ -13,7 +12,7 @@
 class TestItalyToscanyFetcher(unittest.TestCase):
     def setUp(self):
         self.fetcher = ItalyToscanyFetcher()
-        self.test_data_dir = Path(os.path.dirname(__file__)) / "test_data"
+        self.test_data_dir = Path(__file__).parent / "test_data"
 
     def _load_json(self, filename):
         with open(self.test_data_dir / filename, "r", encoding="utf-8") as f:
@@ -48,6 +47,7 @@ def test_get_metadata_merges_wfs_and_station_table(self, mock_requests_session):
 
         result_df = self.fetcher.get_metadata()
 
+        self.assertEqual(result_df.index.name, constants.GAUGE_ID)
         self.assertEqual(list(result_df.index), ["TOS01004005", "TOS01004007", "TOS01004379"])
         self.assertEqual(result_df.loc["TOS01004005", constants.STATION_NAME], "Carrara")
         self.assertEqual(result_df.loc["TOS01004005", constants.RIVER], "Carrione")
@@ -59,6 +59,11 @@ def test_get_metadata_merges_wfs_and_station_table(self, mock_requests_session):
         self.assertAlmostEqual(result_df.loc["TOS01004005", "zero_idrometrico"], 95.69, places=2)
         self.assertEqual(result_df.loc["TOS01004005", constants.COUNTRY], "Italy")
         self.assertEqual(result_df.loc["TOS01004005", constants.SOURCE], self.fetcher.SOURCE)
+        self.assertEqual(mock_session.get.call_count, 2)
+        self.assertEqual(mock_session.get.call_args_list[0].args[0], self.fetcher.METADATA_URL)
+        self.assertEqual(mock_session.get.call_args_list[1].args[0], self.fetcher.STATION_TABLE_URL)
+        self.assertEqual(mock_session.get.call_args_list[0].kwargs["timeout"], 60)
+        self.assertEqual(mock_session.get.call_args_list[1].kwargs["timeout"], 60)
 
     @patch("rivretrieve.utils.requests_retry_session")
     def test_get_data_daily_stage(self, mock_requests_session):
@@ -83,8 +88,12 @@ def test_get_data_daily_stage(self, mock_requests_session):
         ).set_index(constants.TIME_INDEX)
 
         assert_frame_equal(result_df, expected_df)
+        self.assertEqual(result_df.index.name, constants.TIME_INDEX)
         params = mock_session.get.call_args.kwargs["params"]
         self.assertEqual(params["IDST"], "idro_l")
+        self.assertEqual(params["IDS"], "TOS02004365")
+        self.assertEqual(mock_session.get.call_args.args[0], self.fetcher.ARCHIVE_URL)
+        self.assertEqual(mock_session.get.call_args.kwargs["timeout"], 60)
 
     @patch("rivretrieve.utils.requests_retry_session")
     def test_get_data_daily_discharge(self, mock_requests_session):
@@ -109,8 +118,10 @@ def test_get_data_daily_discharge(self, mock_requests_session):
         ).set_index(constants.TIME_INDEX)
 
         assert_frame_equal(result_df, expected_df)
+        self.assertEqual(result_df.index.name, constants.TIME_INDEX)
         params = mock_session.get.call_args.kwargs["params"]
         self.assertEqual(params["IDST"], "idro_p")
+        self.assertEqual(params["IDS"], "TOS02004365")
 
     @patch("rivretrieve.utils.requests_retry_session")
     def test_get_data_returns_empty_when_archive_has_no_table(self, mock_requests_session):
@@ -126,6 +137,7 @@ def test_get_data_returns_empty_when_archive_has_no_table(self, mock_requests_se
         )
 
         self.assertTrue(result_df.empty)
+        self.assertEqual(result_df.index.name, constants.TIME_INDEX)
 
     def test_unsupported_variable_raises(self):
         with self.assertRaises(ValueError):