|
1 | 1 | import json |
2 | | -import os |
3 | 2 | import unittest |
4 | 3 | from pathlib import Path |
5 | 4 | from unittest.mock import MagicMock, patch |
|
14 | 13 | class TestBosniaHerzegovinaFetcher(unittest.TestCase): |
15 | 14 | def setUp(self): |
16 | 15 | self.fetcher = BosniaHerzegovinaFetcher() |
17 | | - self.test_data_dir = Path(os.path.dirname(__file__)) / "test_data" |
| 16 | + self.test_data_dir = Path(__file__).parent / "test_data" |
18 | 17 |
|
19 | 18 | def _load_json(self, filename): |
20 | | - with open(self.test_data_dir / filename, "r", encoding="utf-8") as f: |
21 | | - return json.load(f) |
| 19 | + with (self.test_data_dir / filename).open("r", encoding="utf-8") as file_handle: |
| 20 | + return json.load(file_handle) |
| 21 | + |
| 22 | + def _load_bytes(self, filename): |
| 23 | + return (self.test_data_dir / filename).read_bytes() |
| 24 | + |
| 25 | + @staticmethod |
| 26 | + def _build_response(status_code=200, content=b"", json_data=None): |
| 27 | + response = MagicMock() |
| 28 | + response.status_code = status_code |
| 29 | + response.content = content |
| 30 | + response.json.return_value = json_data |
| 31 | + response.raise_for_status = MagicMock() |
| 32 | + return response |
22 | 33 |
|
23 | 34 | @patch("rivretrieve.utils.requests_retry_session") |
24 | 35 | def test_get_metadata(self, mock_requests_session): |
25 | 36 | mock_session = MagicMock() |
26 | 37 | mock_requests_session.return_value = mock_session |
27 | 38 |
|
28 | | - mock_response = MagicMock() |
29 | | - mock_response.json.return_value = self._load_json("bosnia_herzegovina_metadata_sample.json") |
30 | | - mock_response.raise_for_status = MagicMock() |
| 39 | + mock_response = self._build_response( |
| 40 | + json_data=self._load_json("bosnia_herzegovina_metadata_sample.json") |
| 41 | + ) |
31 | 42 | mock_session.get.return_value = mock_response |
32 | 43 |
|
33 | 44 | result_df = self.fetcher.get_metadata() |
34 | 45 |
|
| 46 | + self.assertEqual(result_df.index.name, constants.GAUGE_ID) |
35 | 47 | self.assertEqual(list(result_df.index), ["4510", "4121"]) |
36 | 48 | self.assertEqual(result_df.loc["4510", constants.STATION_NAME], "HS Kaloševići") |
37 | 49 | self.assertEqual(result_df.loc["4510", constants.RIVER], "Usora") |
38 | 50 | self.assertAlmostEqual(result_df.loc["4510", constants.LATITUDE], 44.64680728070949) |
39 | 51 | self.assertAlmostEqual(result_df.loc["4510", constants.LONGITUDE], 17.90406242892678) |
| 52 | + self.assertIn("metadata_station_carteasting", result_df.columns) |
| 53 | + self.assertIn("catchment", result_df.columns) |
40 | 54 | self.assertEqual(result_df.loc["4510", constants.COUNTRY], "Bosnia and Herzegovina") |
41 | 55 | self.assertEqual(result_df.loc["4510", constants.SOURCE], "vodostaji.voda.ba") |
42 | 56 | self.assertAlmostEqual(result_df.loc["4121", constants.AREA], 123.4) |
| 57 | + mock_session.get.assert_called_once_with(self.fetcher.METADATA_URL, timeout=30) |
43 | 58 |
|
44 | | - @patch("pandas.read_excel") |
45 | 59 | @patch("rivretrieve.utils.requests_retry_session") |
46 | | - def test_get_data_instant_discharge_detects_station_group(self, mock_requests_session, mock_read_excel): |
| 60 | + def test_get_data_instant_discharge_detects_station_group(self, mock_requests_session): |
47 | 61 | mock_session = MagicMock() |
48 | 62 | mock_requests_session.return_value = mock_session |
49 | | - mock_read_excel.return_value = pd.DataFrame( |
50 | | - { |
51 | | - constants.TIME_INDEX: [ |
52 | | - "01.01.2025 00:00", |
53 | | - "01.01.2025 01:00", |
54 | | - "01.01.2025 02:00", |
55 | | - "02.01.2025 00:00", |
56 | | - ], |
57 | | - constants.DISCHARGE_INSTANT: [1.0, 2.0, 3.0, 4.0], |
58 | | - } |
59 | | - ) |
60 | 63 |
|
61 | | - missing_response = MagicMock(status_code=404, content=b"") |
62 | | - success_response = MagicMock(status_code=200, content=b"fake-xlsx-content") |
63 | | - mock_session.get.side_effect = [missing_response, missing_response, success_response] |
| 64 | + missing_response = self._build_response(status_code=404) |
| 65 | + success_response = self._build_response( |
| 66 | + status_code=200, |
| 67 | + content=self._load_bytes("bosnia_herzegovina_4510_discharge_20250323.xlsx"), |
| 68 | + ) |
| 69 | + mock_session.get.side_effect = [missing_response, missing_response, missing_response, success_response] |
64 | 70 |
|
65 | 71 | result_df = self.fetcher.get_data( |
66 | 72 | gauge_id="4510", |
67 | 73 | variable=constants.DISCHARGE_INSTANT, |
68 | | - start_date="2025-01-01", |
69 | | - end_date="2025-01-01", |
| 74 | + start_date="2025-03-23", |
| 75 | + end_date="2025-03-23", |
70 | 76 | ) |
71 | 77 |
|
72 | 78 | expected_df = pd.DataFrame( |
73 | 79 | { |
74 | | - constants.TIME_INDEX: pd.to_datetime( |
75 | | - ["2025-01-01 00:00:00", "2025-01-01 01:00:00", "2025-01-01 02:00:00"] |
76 | | - ), |
77 | | - constants.DISCHARGE_INSTANT: [1.0, 2.0, 3.0], |
| 80 | + constants.TIME_INDEX: pd.date_range("2025-03-23 00:00:00", periods=24, freq="h"), |
| 81 | + constants.DISCHARGE_INSTANT: [ |
| 82 | + 8.304, |
| 83 | + 7.958, |
| 84 | + 8.105, |
| 85 | + 8.007, |
| 86 | + 7.909, |
| 87 | + 7.762, |
| 88 | + 7.958, |
| 89 | + 7.665, |
| 90 | + 7.713, |
| 91 | + 8.205, |
| 92 | + 8.007, |
| 93 | + 7.328, |
| 94 | + 7.860, |
| 95 | + 8.105, |
| 96 | + 7.568, |
| 97 | + 7.811, |
| 98 | + 7.958, |
| 99 | + 7.762, |
| 100 | + 7.665, |
| 101 | + 7.280, |
| 102 | + 7.568, |
| 103 | + 7.472, |
| 104 | + 7.472, |
| 105 | + 7.280, |
| 106 | + ], |
78 | 107 | } |
79 | 108 | ).set_index(constants.TIME_INDEX) |
80 | 109 |
|
81 | | - assert_frame_equal(result_df, expected_df) |
82 | | - self.assertEqual(result_df.attrs["station_group"], 3) |
83 | | - self.assertEqual(mock_session.get.call_count, 3) |
| 110 | + assert_frame_equal(result_df, expected_df, check_dtype=False) |
| 111 | + self.assertEqual(result_df.index.name, constants.TIME_INDEX) |
| 112 | + self.assertEqual(result_df.attrs["station_group"], 4) |
| 113 | + self.assertEqual(mock_session.get.call_count, 4) |
84 | 114 | self.assertIn("/1/4510/Q/Q_1Y.xlsx", mock_session.get.call_args_list[0].args[0]) |
85 | | - self.assertIn("/3/4510/Q/Q_1Y.xlsx", mock_session.get.call_args_list[2].args[0]) |
| 115 | + self.assertIn("/4/4510/Q/Q_1Y.xlsx", mock_session.get.call_args_list[3].args[0]) |
| 116 | + self.assertTrue(all(call.kwargs["timeout"] == 20 for call in mock_session.get.call_args_list)) |
86 | 117 |
|
87 | | - @patch("pandas.read_excel") |
88 | 118 | @patch("rivretrieve.utils.requests_retry_session") |
89 | | - def test_get_data_daily_temperature(self, mock_requests_session, mock_read_excel): |
| 119 | + def test_get_data_daily_stage_converts_centimeters_to_meters(self, mock_requests_session): |
90 | 120 | mock_session = MagicMock() |
91 | 121 | mock_requests_session.return_value = mock_session |
92 | | - mock_read_excel.return_value = pd.DataFrame( |
93 | | - { |
94 | | - constants.TIME_INDEX: [ |
95 | | - "01.01.2025 00:00", |
96 | | - "01.01.2025 12:00", |
97 | | - "02.01.2025 00:00", |
98 | | - "02.01.2025 12:00", |
99 | | - ], |
100 | | - constants.WATER_TEMPERATURE_DAILY_MEAN: [10.0, 11.0, 12.0, 12.0], |
101 | | - } |
| 122 | + missing_response = self._build_response(status_code=404) |
| 123 | + success_response = self._build_response( |
| 124 | + status_code=200, |
| 125 | + content=self._load_bytes("bosnia_herzegovina_4510_stage_20250323.xlsx"), |
102 | 126 | ) |
103 | | - |
104 | | - success_response = MagicMock(status_code=200, content=b"fake-xlsx-content") |
105 | | - mock_session.get.return_value = success_response |
| 127 | + mock_session.get.side_effect = [missing_response, missing_response, missing_response, success_response] |
106 | 128 |
|
107 | 129 | result_df = self.fetcher.get_data( |
108 | 130 | gauge_id="4510", |
109 | | - variable=constants.WATER_TEMPERATURE_DAILY_MEAN, |
110 | | - start_date="2025-01-01", |
111 | | - end_date="2025-01-02", |
| 131 | + variable=constants.STAGE_DAILY_MEAN, |
| 132 | + start_date="2025-03-23", |
| 133 | + end_date="2025-03-24", |
112 | 134 | ) |
113 | 135 |
|
114 | 136 | expected_df = pd.DataFrame( |
115 | 137 | { |
116 | | - constants.TIME_INDEX: pd.to_datetime(["2025-01-01", "2025-01-02"]), |
117 | | - constants.WATER_TEMPERATURE_DAILY_MEAN: [10.5, 12.0], |
| 138 | + constants.TIME_INDEX: pd.to_datetime(["2025-03-23", "2025-03-24"]), |
| 139 | + constants.STAGE_DAILY_MEAN: [0.8113333333333334, 0.9504166666666667], |
118 | 140 | } |
119 | 141 | ).set_index(constants.TIME_INDEX) |
120 | 142 |
|
121 | | - assert_frame_equal(result_df, expected_df) |
| 143 | + assert_frame_equal(result_df, expected_df, check_dtype=False) |
| 144 | + self.assertIn("/4/4510/H/H_1Y.xlsx", mock_session.get.call_args_list[3].args[0]) |
| 145 | + |
| 146 | + @patch("rivretrieve.utils.requests_retry_session") |
| 147 | + def test_get_data_returns_standardized_empty_frame_for_empty_temperature_workbook(self, mock_requests_session): |
| 148 | + mock_session = MagicMock() |
| 149 | + mock_requests_session.return_value = mock_session |
| 150 | + missing_response = self._build_response(status_code=404) |
| 151 | + success_response = self._build_response( |
| 152 | + status_code=200, |
| 153 | + content=self._load_bytes("bosnia_herzegovina_4510_water_temperature_20250323.xlsx"), |
| 154 | + ) |
| 155 | + mock_session.get.side_effect = [missing_response, missing_response, missing_response, success_response] |
| 156 | + |
| 157 | + result_df = self.fetcher.get_data( |
| 158 | + gauge_id="4510", |
| 159 | + variable=constants.WATER_TEMPERATURE_INSTANT, |
| 160 | + start_date="2025-03-23", |
| 161 | + end_date="2025-03-23", |
| 162 | + ) |
| 163 | + |
| 164 | + expected_df = pd.DataFrame( |
| 165 | + columns=[constants.TIME_INDEX, constants.WATER_TEMPERATURE_INSTANT] |
| 166 | + ).set_index(constants.TIME_INDEX) |
| 167 | + |
| 168 | + assert_frame_equal(result_df, expected_df, check_dtype=False) |
| 169 | + self.assertEqual(result_df.index.name, constants.TIME_INDEX) |
| 170 | + self.assertIn("/4/4510/WT/Tvode_1Y.xlsx", mock_session.get.call_args_list[3].args[0]) |
122 | 171 |
|
123 | 172 |
|
124 | 173 | if __name__ == "__main__": |
|
0 commit comments