From 71f6becfffc1b9646e10935f5418d340636d4828 Mon Sep 17 00:00:00 2001 From: yemeen Date: Thu, 2 Oct 2025 20:47:02 -0400 Subject: [PATCH 1/4] add new compression function and remove redundant calculation --- src/ect/ect.py | 9 ++-- src/ect/results.py | 119 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 6 deletions(-) diff --git a/src/ect/ect.py b/src/ect/ect.py index 04b1824..9f9f4e7 100644 --- a/src/ect/ect.py +++ b/src/ect/ect.py @@ -205,13 +205,10 @@ def _ect_all_dirs( vertex_rank_1based[vertex_index] = rnk + 1 # euler char can only jump at each vertex value + # we know vertices add +1 so wait until end to add + # jump_amount = np.zeros(num_vertices + 1, dtype=np.int64) - # 0-cells add +1 at their entrance ranks - for v in range(num_vertices): - rank_v = vertex_rank_1based[v] - jump_amount[rank_v] += 1 - # each pair of pointers defines a cell, so we iterate over them num_cells = cell_vertex_pointers.shape[0] - 1 for cell_idx in range(num_cells): @@ -232,7 +229,7 @@ def _ect_all_dirs( running_sum = 0 for r in range(num_vertices + 1): running_sum += jump_amount[r] - euler_prefix[r] = running_sum + euler_prefix[r] = running_sum + r # +r because vertices add +1 # now find euler char at each threshold wrt the sorted heights sorted_heights = heights[sort_order] diff --git a/src/ect/results.py b/src/ect/results.py index f7166dc..877e06a 100644 --- a/src/ect/results.py +++ b/src/ect/results.py @@ -5,6 +5,63 @@ from typing import Union, List, Callable +# ---------- CSR <-> Dense helpers (prefix-difference over thresholds) ---------- +def _csr_prefix_to_dense(row_ptr, col_idx, data, num_dirs, num_thresh): + """Reconstruct dense matrix from CSR of per-row prefix jumps. + + Each row j accumulates jumps at threshold bins given by + col_idx[row_ptr[j]:row_ptr[j+1]] with magnitudes data[...]. The output is + the cumulative sum across thresholds [0..num_thresh-1]. Any entries at bin + num_thresh are interpreted as past the last output index and ignored. + """ + T = int(num_thresh) + D = int(num_dirs) + out = np.zeros((D, T), dtype=np.int64) + for j in range(D): + start = int(row_ptr[j]) + end = int(row_ptr[j + 1]) + s = 0 + ptr = start + for t in range(T): + while ptr < end and int(col_idx[ptr]) == t: + s += int(data[ptr]) + ptr += 1 + out[j, t] = s + # any jumps at bin T are past the last output index by convention + return out + + +def _dense_to_csr_prefix(dense64: np.ndarray): + """Build CSR of per-row prefix jumps from a dense ECT matrix. + + For each row, store non-zero differences of consecutive thresholds: + jump(0) = dense[0] + jump(t) = dense[t] - dense[t-1] for t >= 1 + """ + if dense64.ndim != 2: + raise ValueError("dense matrix must be 2D") + D, T = dense64.shape + # collect per-row jumps then trim to actual size + row_ptr = np.zeros(D + 1, dtype=np.int64) + jumps_col = [] + jumps_val = [] + nnz = 0 + for j in range(D): + prev = 0 + for t in range(T): + cur = int(dense64[j, t]) + delta = cur - prev + if delta != 0: + jumps_col.append(t) + jumps_val.append(delta) + nnz += 1 + prev = cur + row_ptr[j + 1] = nnz + col_idx = np.asarray(jumps_col, dtype=np.int32) + data = np.asarray(jumps_val, dtype=np.int64) + return row_ptr, col_idx, data + + class ECTResult(np.ndarray): """ A numpy ndarray subclass that carries ECT metadata and plotting capabilities @@ -19,6 +76,9 @@ def __new__(cls, matrix, directions, thresholds): obj = np.asarray(matrix, dtype=np.int32).view(cls) obj.directions = directions obj.thresholds = thresholds + obj.csr_row_ptr = None + obj.csr_col_idx = None + obj.csr_data = None return obj def __array_finalize__(self, obj): @@ -26,6 +86,65 @@ def __array_finalize__(self, obj): return self.directions = getattr(obj, "directions", None) self.thresholds = getattr(obj, "thresholds", None) + self.csr_row_ptr = getattr(obj, "csr_row_ptr", None) + self.csr_col_idx = getattr(obj, "csr_col_idx", None) + self.csr_data = getattr(obj, "csr_data", None) + + @property + def has_csr(self): + return ( + getattr(self, "csr_row_ptr", None) is not None + and getattr(self, "csr_col_idx", None) is not None + and getattr(self, "csr_data", None) is not None + ) + + @classmethod + def from_csr(cls, row_ptr, col_idx, data, directions, thresholds, dtype=np.int32): + num_dirs = len(directions) + num_thresh = len(thresholds) + dense64 = _csr_prefix_to_dense(row_ptr, col_idx, data, num_dirs, num_thresh) + dense = dense64.astype(dtype, copy=False) if dtype == np.int32 else dense64 + obj = cls(dense, directions, thresholds) + obj.csr_row_ptr = row_ptr + obj.csr_col_idx = col_idx + obj.csr_data = data + return obj + + def to_dense(self): + if not self.has_csr: + return self + num_dirs = self.shape[0] + num_thresh = self.shape[1] + dense64 = _csr_prefix_to_dense( + self.csr_row_ptr, self.csr_col_idx, self.csr_data, num_dirs, num_thresh + ) + return dense64.astype(self.dtype, copy=False) + + def save_npz(self, path): + if not self.has_csr: + row_ptr, col_idx, data = _dense_to_csr_prefix( + self.astype(np.int64, copy=False) + ) + else: + row_ptr, col_idx, data = self.csr_row_ptr, self.csr_col_idx, self.csr_data + np.savez_compressed( + path, + row_ptr=row_ptr, + col_idx=col_idx, + data=data, + thresholds=np.asarray(self.thresholds, dtype=np.float64), + dtype=str(self.dtype), + ) + + @classmethod + def load_npz(cls, path, directions): + z = np.load(path, allow_pickle=False) + row_ptr = z["row_ptr"] + col_idx = z["col_idx"] + data = z["data"] + thresholds = z["thresholds"] + dtype = np.dtype(str(z["dtype"])) + return cls.from_csr(row_ptr, col_idx, data, directions, thresholds, dtype=dtype) def plot(self, ax=None, *, radial=False, **kwargs): """Plot ECT matrix with proper handling for both 2D and 3D. From 2c467e56cabe5af9e74aa3ddc30e5fbc259f4d1a Mon Sep 17 00:00:00 2001 From: yemeen Date: Thu, 2 Oct 2025 20:52:58 -0400 Subject: [PATCH 2/4] Bump version to 1.2.2 in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c3cc840..d63867f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ect" -version = "1.2.1" +version = "1.2.2" authors = [ { name="Liz Munch", email="muncheli@msu.edu" }, ] From 55b28fc114e79da199e744b7550e5e5cb7321cba Mon Sep 17 00:00:00 2001 From: yemeen Date: Thu, 2 Oct 2025 20:54:58 -0400 Subject: [PATCH 3/4] Refactor imports and add ECTResult functionalities --- tests/test_ect_result.py | 121 +++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 38 deletions(-) diff --git a/tests/test_ect_result.py b/tests/test_ect_result.py index eb90b0f..bea4584 100644 --- a/tests/test_ect_result.py +++ b/tests/test_ect_result.py @@ -1,8 +1,11 @@ import unittest import numpy as np import matplotlib.pyplot as plt -from ect import ECT, Directions +import tempfile +import os +from ect import ECT from ect.utils.examples import create_example_graph +from ect.results import ECTResult class TestECTResult(unittest.TestCase): @@ -28,23 +31,23 @@ def test_metadata_preservation(self): def test_smooth_transform(self): smooth = self.result.smooth() - + # test shape preservation self.assertEqual(smooth.shape, self.result.shape) - + # test metadata preservation self.assertEqual(smooth.directions, self.result.directions) self.assertTrue(np.array_equal(smooth.thresholds, self.result.thresholds)) - + # test each step of SECT calculation data = self.result.astype(np.float64) - + # 1. test that row averages are subtracted correctly row_avgs = np.average(data, axis=1) for i in range(len(row_avgs)): row = data[i] - row_avgs[i] self.assertTrue(np.allclose(np.average(row), 0)) - + # 2. test that result is cumulative sum of centered data centered = data - row_avgs[:, np.newaxis] expected_smooth = np.cumsum(centered, axis=1) @@ -55,7 +58,7 @@ def test_plotting(self): ax = self.result.plot() self.assertTrue(isinstance(ax, plt.Axes)) plt.close() - + # test plotting with custom axes fig, ax = plt.subplots() self.result.plot(ax=ax) @@ -63,10 +66,10 @@ def test_plotting(self): def test_single_direction_result(self): result = self.ect.calculate(self.graph, theta=0) - + # test shape self.assertEqual(result.shape, (1, self.ect.num_thresh)) - + # test plotting single direction ax = result.plot() self.assertTrue(isinstance(ax, plt.Axes)) @@ -86,20 +89,20 @@ def test_dist_single_ectresult(self): result2_modified = result2 + 1 result2_modified.directions = result2.directions result2_modified.thresholds = result2.thresholds - + # Test L1 distance (default) dist_l1 = self.result.dist(result2_modified) expected_l1 = np.abs(self.result - result2_modified).sum() self.assertAlmostEqual(dist_l1, expected_l1) self.assertIsInstance(dist_l1, (float, np.floating)) - + # Test L2 distance - dist_l2 = self.result.dist(result2_modified, metric='euclidean') + dist_l2 = self.result.dist(result2_modified, metric="euclidean") expected_l2 = np.sqrt(((self.result - result2_modified) ** 2).sum()) self.assertAlmostEqual(dist_l2, expected_l2) - + # Test L-inf distance - dist_linf = self.result.dist(result2_modified, metric='chebyshev') + dist_linf = self.result.dist(result2_modified, metric="chebyshev") expected_linf = np.abs(self.result - result2_modified).max() self.assertAlmostEqual(dist_linf, expected_linf) @@ -109,24 +112,24 @@ def test_dist_list_of_ectresults(self): result2 = self.result + 1 result3 = self.result + 2 result4 = self.result + 3 - + # Preserve metadata for r, val in [(result2, 1), (result3, 2), (result4, 3)]: r.directions = self.result.directions r.thresholds = self.result.thresholds - + # Test batch distances distances = self.result.dist([result2, result3, result4]) - + # Check return type is array self.assertIsInstance(distances, np.ndarray) self.assertEqual(len(distances), 3) - + # Check individual distances are correct expected_dists = [ np.abs(self.result - result2).sum(), np.abs(self.result - result3).sum(), - np.abs(self.result - result4).sum() + np.abs(self.result - result4).sum(), ] np.testing.assert_array_almost_equal(distances, expected_dists) @@ -135,25 +138,25 @@ def test_dist_custom_metric(self): result2 = self.result + 1 result2.directions = self.result.directions result2.thresholds = self.result.thresholds - + # Define custom metric - L0.5 norm def custom_metric(u, v): return np.sum(np.abs(u - v) ** 0.5) - + # Test with custom metric dist_custom = self.result.dist(result2, metric=custom_metric) expected = custom_metric(self.result.ravel(), result2.ravel()) self.assertAlmostEqual(dist_custom, expected) - + # Test custom metric with batch result3 = self.result + 2 result3.directions = self.result.directions result3.thresholds = self.result.thresholds - + distances = self.result.dist([result2, result3], metric=custom_metric) expected_batch = [ custom_metric(self.result.ravel(), result2.ravel()), - custom_metric(self.result.ravel(), result3.ravel()) + custom_metric(self.result.ravel(), result3.ravel()), ] np.testing.assert_array_almost_equal(distances, expected_batch) @@ -162,14 +165,14 @@ def test_dist_additional_kwargs(self): result2 = self.result + 1 result2.directions = self.result.directions result2.thresholds = self.result.thresholds - + # Test minkowski with different p values - dist_p3 = self.result.dist(result2, metric='minkowski', p=3) - expected_p3 = np.sum(np.abs(self.result - result2) ** 3) ** (1/3) + dist_p3 = self.result.dist(result2, metric="minkowski", p=3) + expected_p3 = np.sum(np.abs(self.result - result2) ** 3) ** (1 / 3) self.assertAlmostEqual(dist_p3, expected_p3, places=5) - - dist_p5 = self.result.dist(result2, metric='minkowski', p=5) - expected_p5 = np.sum(np.abs(self.result - result2) ** 5) ** (1/5) + + dist_p5 = self.result.dist(result2, metric="minkowski", p=5) + expected_p5 = np.sum(np.abs(self.result - result2) ** 5) ** (1 / 5) self.assertAlmostEqual(dist_p5, expected_p5, places=5) def test_dist_empty_list(self): @@ -183,17 +186,17 @@ def test_dist_shape_mismatch(self): # Create ECTResult with different shape ect_different = ECT(num_dirs=5, num_thresh=7) result_different = ect_different.calculate(self.graph) - + # Single ECTResult with wrong shape with self.assertRaises(ValueError) as cm: self.result.dist(result_different) self.assertIn("Shape mismatch", str(cm.exception)) - + # List with one wrong shape result2 = self.result + 1 result2.directions = self.result.directions result2.thresholds = self.result.thresholds - + with self.assertRaises(ValueError) as cm: self.result.dist([result2, result_different]) self.assertIn("Shape mismatch at index 1", str(cm.exception)) @@ -202,11 +205,53 @@ def test_dist_self(self): """Test distance to self is zero""" dist_self = self.result.dist(self.result) self.assertEqual(dist_self, 0.0) - + # Also test with different metrics - self.assertEqual(self.result.dist(self.result, metric='euclidean'), 0.0) - self.assertEqual(self.result.dist(self.result, metric='chebyshev'), 0.0) + self.assertEqual(self.result.dist(self.result, metric="euclidean"), 0.0) + self.assertEqual(self.result.dist(self.result, metric="chebyshev"), 0.0) + + def test_has_csr_and_to_dense_semantics(self): + self.assertFalse(self.result.has_csr) + dense_before = np.asarray(self.result) + dense_after = self.result.to_dense() + np.testing.assert_array_equal(dense_before, dense_after) + + def test_from_csr_to_dense_roundtrip(self): + ect = ECT(num_dirs=8, num_thresh=32) + res = ect.calculate(self.graph) + with tempfile.TemporaryDirectory() as d: + p = os.path.join(d, "ect_sparse.npz") + res.save_npz(p) + z = np.load(p, allow_pickle=False) + row_ptr = z["row_ptr"] + col_idx = z["col_idx"] + data = z["data"] + thresholds = z["thresholds"] + + res2 = ECTResult.from_csr( + row_ptr, col_idx, data, res.directions, thresholds, dtype=res.dtype + ) + self.assertTrue(res2.has_csr) + np.testing.assert_array_equal( + res2.to_dense(), np.asarray(res, dtype=res2.dtype) + ) + self.assertEqual(res2.dtype, res.dtype) + np.testing.assert_array_equal(res2.thresholds, res.thresholds) + + def test_save_load_npz_roundtrip(self): + ect = ECT(num_dirs=16, num_thresh=64) + res = ect.calculate(self.graph) + with tempfile.TemporaryDirectory() as d: + p = os.path.join(d, "ect_sparse.npz") + res.save_npz(p) + loaded = ECTResult.load_npz(p, directions=res.directions) + + # Loaded object should match numerically and carry metadata + np.testing.assert_array_equal(np.asarray(loaded), np.asarray(res)) + self.assertEqual(loaded.dtype, res.dtype) + np.testing.assert_array_equal(loaded.thresholds, res.thresholds) + self.assertEqual(loaded.directions, res.directions) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() From 00cda42157ba64f3c91d04688d18f1edda456ace Mon Sep 17 00:00:00 2001 From: yemeen Date: Thu, 2 Oct 2025 20:56:56 -0400 Subject: [PATCH 4/4] Revert "add radial plotting" This reverts commit b862957988d575c8d726a6a027dec94db9dd2c9b. --- src/ect/results.py | 165 +-------------------------------------------- 1 file changed, 2 insertions(+), 163 deletions(-) diff --git a/src/ect/results.py b/src/ect/results.py index 877e06a..5106839 100644 --- a/src/ect/results.py +++ b/src/ect/results.py @@ -146,15 +146,8 @@ def load_npz(cls, path, directions): dtype = np.dtype(str(z["dtype"])) return cls.from_csr(row_ptr, col_idx, data, directions, thresholds, dtype=dtype) - def plot(self, ax=None, *, radial=False, **kwargs): - """Plot ECT matrix with proper handling for both 2D and 3D. - - Set radial=True to render a polar visualization (2D only). Any extra - keyword arguments are forwarded to the radial renderer. - """ - if radial: - return self._plot_radial(ax=ax, **kwargs) - + def plot(self, ax=None): + """Plot ECT matrix with proper handling for both 2D and 3D""" ax = ax or plt.gca() if self.thresholds is None: @@ -233,46 +226,6 @@ def smooth(self): # create new ECTResult with float type return ECTResult(sect.astype(np.float64), self.directions, self.thresholds) - # Internal plotting utilities - def _ensure_2d(self): - if self.directions is None or self.directions.dim != 2: - raise ValueError("This visualization is only supported for 2D ECT results") - - def _theta_threshold_mesh(self): - thetas = self.directions.thetas - thresholds = self.thresholds - THETA, R = np.meshgrid(thetas, thresholds) - return THETA, R - - def _configure_polar_axes( - self, ax, rmin=0.0, rmax=None, theta_zero="N", theta_dir=-1 - ): - ax.set_theta_zero_location(theta_zero) - ax.set_theta_direction(theta_dir) - if rmax is None: - rmax = float(np.max(self.thresholds)) - ax.set_ylim(float(rmin), float(rmax)) - return ax - - def _scale_overlay_radii(self, points, rmin=0.0, rmax=None, fit_to_thresholds=True): - x = points[:, 0] - y = points[:, 1] - r = np.sqrt(x**2 + y**2) - theta = np.arctan2(y, x) - - if rmax is None: - rmax = float(np.max(self.thresholds)) - - if not fit_to_thresholds: - return theta, r - - max_r_points = float(np.max(r)) if r.size else 0.0 - if max_r_points > 0.0: - scaled_r = (r / max_r_points) * (rmax - float(rmin)) + float(rmin) - else: - scaled_r = r - return theta, scaled_r - def _plot_ecc(self, theta): """Plot the Euler Characteristic Curve for a specific direction""" plt.step(self.thresholds, self.T, label="ECC") @@ -281,120 +234,6 @@ def _plot_ecc(self, theta): plt.xlabel("$a$") plt.ylabel(r"$\chi(K_a)$") - def _plot_radial( - self, - ax=None, - title=None, - cmap="viridis", - *, - rmin=0.0, - rmax=None, - colorbar=True, - overlay=None, - overlay_kwargs=None, - **kwargs, - ): - """ - Plot ECT matrix in polar coordinates (radial plot). - - Args: - ax: matplotlib axes object. If None, creates a new polar subplot - title: optional string for plot title - cmap: colormap for the plot (default: 'viridis') - rmin: minimum radius for the plot (default: 0.0) - rmax: maximum radius for the plot (default: None) - colorbar: whether to show the colorbar (default: True) - overlay: points to overlay on the plot (default: None) - - **kwargs: additional keyword arguments passed to pcolormesh - - Returns: - matplotlib.axes.Axes: The axes object used for plotting - """ - self._ensure_2d() - - if ax is None: - fig, ax = plt.subplots( - subplot_kw=dict(projection="polar"), figsize=(10, 10) - ) - - THETA, R = self._theta_threshold_mesh() - - im = ax.pcolormesh(THETA, R, self.T, cmap=cmap, **kwargs) - - self._configure_polar_axes(ax, rmin=rmin, rmax=rmax) - - if title: - ax.set_title(title) - - if colorbar: - plt.colorbar(im, ax=ax, label="ECT Value") - - if overlay is not None: - overlay_kwargs = overlay_kwargs or {} - theta, scaled_r = self._scale_overlay_radii( - overlay, rmin=rmin, rmax=rmax, fit_to_thresholds=True - ) - ax.plot( - theta, - scaled_r, - "-", - color=overlay_kwargs.get("color", "black"), - linewidth=overlay_kwargs.get("linewidth", 2), - alpha=overlay_kwargs.get("alpha", 0.5), - ) - - return ax - - def _overlay_points( - self, - points, - ax=None, - color="black", - linewidth=2, - alpha=0.5, - *, - rmin=0.0, - rmax=None, - fit_to_thresholds=True, - **kwargs, - ): - """ - Overlay original points on a radial ECT plot. - - Args: - points: numpy array of shape (N, 2) containing the original points - ax: matplotlib polar axes object. If None, uses current axes - color: color for the overlay line (default: 'white') - linewidth: line width for the overlay (default: 2) - alpha: transparency for the overlay (default: 0.5) - **kwargs: additional keyword arguments passed to plot - - Returns: - matplotlib.axes.Axes: The axes object used for plotting - """ - if ax is None: - ax = plt.gca() - - if not hasattr(ax, "name") or ax.name != "polar": - raise ValueError("overlay_points requires a polar axes object") - - theta, scaled_r = self._scale_overlay_radii( - points, rmin=rmin, rmax=rmax, fit_to_thresholds=fit_to_thresholds - ) - - ax.plot( - theta, - scaled_r, - "-", - color=color, - linewidth=linewidth, - alpha=alpha, - **kwargs, - ) - - return ax - def dist( self, other: Union["ECTResult", List["ECTResult"]],