Skip to content

Commit a5d9a40

Browse files
committed
Align Italy Toscany fetcher with design docs
1 parent 9541a27 commit a5d9a40

2 files changed

Lines changed: 47 additions & 20 deletions

File tree

rivretrieve/italy_toscany.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,18 @@ class ItalyToscanyFetcher(base.RiverDataFetcher):
2020
2121
Data source:
2222
- monitoring website: https://www.sir.toscana.it/monitoraggio/stazioni.php?type=idro
23-
- metadata WFS: https://geo.sir.toscana.it/geoserver/geo/ows
24-
- archive download endpoint: https://www.sir.toscana.it/archivio/download.php
23+
- historical archive portal: https://www.sir.toscana.it/consistenza-rete
2524
2625
Supported variables:
2726
- ``constants.DISCHARGE_DAILY_MEAN`` (m³/s)
2827
- ``constants.STAGE_DAILY_MEAN`` (m)
2928
3029
Data description and API:
31-
- public idrometer metadata layer: ``geo:cf_idrometri``
32-
- monitoring station table: ``monitoraggio/stazioni.php?type=idro``
33-
- historical download endpoint parameters:
34-
``IDST=idro_p`` for discharge and ``IDST=idro_l`` for stage
30+
- archive data description: https://www.sir.toscana.it/consistenza-rete
31+
- GIS layers overview for idrometers: https://www.sir.toscana.it/strati-gis
3532
3633
Terms of use:
37-
- see https://www.sir.toscana.it/
38-
39-
Notes:
40-
- metadata merges the static WFS idrometer layer with the public monitoring table
41-
so river names and basin labels are retained alongside stable coordinates
42-
- coordinates are transformed from EPSG:3003 to WGS84
43-
- the archive endpoint returns provider CSV files that use semicolons,
44-
decimal commas, Latin-1 text, and a separate quality-flag column
45-
- some stations do not expose discharge data in the archive;
46-
in those cases ``get_data()`` returns an empty DataFrame
34+
- data usage notes for archived data: https://www.sir.toscana.it/consistenza-rete
4735
"""
4836

4937
METADATA_URL = (
@@ -217,7 +205,11 @@ def _parse_station_table(cls, text: str) -> pd.DataFrame:
217205
return df.set_index(constants.GAUGE_ID)
218206

219207
def get_metadata(self) -> pd.DataFrame:
220-
"""Fetches live metadata for Italy-Toscany stations."""
208+
"""Fetches live metadata for Italy-Toscany stations.
209+
210+
Merges the live GIS layer with the public monitoring table and returns
211+
a DataFrame indexed by ``constants.GAUGE_ID``.
212+
"""
221213
session = utils.requests_retry_session(
222214
retries=6,
223215
backoff_factor=1,
@@ -342,7 +334,30 @@ def get_data(
342334
start_date: Optional[str] = None,
343335
end_date: Optional[str] = None,
344336
) -> pd.DataFrame:
345-
"""Fetches and parses time series data for a specific gauge and variable."""
337+
"""Fetches and parses time series data for a specific gauge and variable.
338+
339+
This method retrieves the requested data from the provider's archive service,
340+
parses it, and returns it in a standardized pandas DataFrame format.
341+
342+
Args:
343+
gauge_id: The site-specific identifier for the gauge.
344+
variable: The variable to fetch. Must be one of the strings listed
345+
in the fetcher's ``get_available_variables()`` output.
346+
These are typically defined in ``rivretrieve.constants``.
347+
start_date: Optional start date for the data retrieval in 'YYYY-MM-DD' format.
348+
If None, data is fetched from the earliest available date.
349+
end_date: Optional end date for the data retrieval in 'YYYY-MM-DD' format.
350+
If None, data is fetched up to the latest available date.
351+
352+
Returns:
353+
pd.DataFrame: A pandas DataFrame indexed by datetime objects (``constants.TIME_INDEX``)
354+
with a single column named after the requested ``variable``. The DataFrame
355+
will be empty if no data is found for the given parameters.
356+
357+
Raises:
358+
ValueError: If the requested ``variable`` is not supported by this fetcher.
359+
Exception: For unexpected download or parsing errors.
360+
"""
346361
start_date = utils.format_start_date(start_date)
347362
end_date = utils.format_end_date(end_date)
348363

tests/test_italy_toscany.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
import os
32
import unittest
43
from pathlib import Path
54
from unittest.mock import MagicMock, patch
@@ -13,7 +12,7 @@
1312
class TestItalyToscanyFetcher(unittest.TestCase):
1413
def setUp(self):
1514
self.fetcher = ItalyToscanyFetcher()
16-
self.test_data_dir = Path(os.path.dirname(__file__)) / "test_data"
15+
self.test_data_dir = Path(__file__).parent / "test_data"
1716

1817
def _load_json(self, filename):
1918
with open(self.test_data_dir / filename, "r", encoding="utf-8") as f:
@@ -48,6 +47,7 @@ def test_get_metadata_merges_wfs_and_station_table(self, mock_requests_session):
4847

4948
result_df = self.fetcher.get_metadata()
5049

50+
self.assertEqual(result_df.index.name, constants.GAUGE_ID)
5151
self.assertEqual(list(result_df.index), ["TOS01004005", "TOS01004007", "TOS01004379"])
5252
self.assertEqual(result_df.loc["TOS01004005", constants.STATION_NAME], "Carrara")
5353
self.assertEqual(result_df.loc["TOS01004005", constants.RIVER], "Carrione")
@@ -59,6 +59,11 @@ def test_get_metadata_merges_wfs_and_station_table(self, mock_requests_session):
5959
self.assertAlmostEqual(result_df.loc["TOS01004005", "zero_idrometrico"], 95.69, places=2)
6060
self.assertEqual(result_df.loc["TOS01004005", constants.COUNTRY], "Italy")
6161
self.assertEqual(result_df.loc["TOS01004005", constants.SOURCE], self.fetcher.SOURCE)
62+
self.assertEqual(mock_session.get.call_count, 2)
63+
self.assertEqual(mock_session.get.call_args_list[0].args[0], self.fetcher.METADATA_URL)
64+
self.assertEqual(mock_session.get.call_args_list[1].args[0], self.fetcher.STATION_TABLE_URL)
65+
self.assertEqual(mock_session.get.call_args_list[0].kwargs["timeout"], 60)
66+
self.assertEqual(mock_session.get.call_args_list[1].kwargs["timeout"], 60)
6267

6368
@patch("rivretrieve.utils.requests_retry_session")
6469
def test_get_data_daily_stage(self, mock_requests_session):
@@ -83,8 +88,12 @@ def test_get_data_daily_stage(self, mock_requests_session):
8388
).set_index(constants.TIME_INDEX)
8489

8590
assert_frame_equal(result_df, expected_df)
91+
self.assertEqual(result_df.index.name, constants.TIME_INDEX)
8692
params = mock_session.get.call_args.kwargs["params"]
8793
self.assertEqual(params["IDST"], "idro_l")
94+
self.assertEqual(params["IDS"], "TOS02004365")
95+
self.assertEqual(mock_session.get.call_args.args[0], self.fetcher.ARCHIVE_URL)
96+
self.assertEqual(mock_session.get.call_args.kwargs["timeout"], 60)
8897

8998
@patch("rivretrieve.utils.requests_retry_session")
9099
def test_get_data_daily_discharge(self, mock_requests_session):
@@ -109,8 +118,10 @@ def test_get_data_daily_discharge(self, mock_requests_session):
109118
).set_index(constants.TIME_INDEX)
110119

111120
assert_frame_equal(result_df, expected_df)
121+
self.assertEqual(result_df.index.name, constants.TIME_INDEX)
112122
params = mock_session.get.call_args.kwargs["params"]
113123
self.assertEqual(params["IDST"], "idro_p")
124+
self.assertEqual(params["IDS"], "TOS02004365")
114125

115126
@patch("rivretrieve.utils.requests_retry_session")
116127
def test_get_data_returns_empty_when_archive_has_no_table(self, mock_requests_session):
@@ -126,6 +137,7 @@ def test_get_data_returns_empty_when_archive_has_no_table(self, mock_requests_se
126137
)
127138

128139
self.assertTrue(result_df.empty)
140+
self.assertEqual(result_df.index.name, constants.TIME_INDEX)
129141

130142
def test_unsupported_variable_raises(self):
131143
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)