diff --git a/README.rst b/README.rst index e7c59ef3a..218da5b6f 100644 --- a/README.rst +++ b/README.rst @@ -48,6 +48,20 @@ Conda users can install from conda-forge: conda install -c conda-forge python-blosc2 +Command line tools +================== + +Two CLI tools are installed along with the package: + +- ``b2view``: an interactive terminal browser (TUI) for TreeStore bundles + (``.b2d`` directories or ``.b2z`` files), with paged views of NDArray and + CTable data of any size + (`walkthrough `_). +- ``parquet-to-blosc2``: converts Parquet files to Blosc2 columnar table + stores, and back + (`walkthrough `_; + requires ``pip install "blosc2[parquet]"``). + Documentation ============= diff --git a/bench/tree-store.py b/bench/tree-store.py index 01ddf3493..eb68f7e04 100644 --- a/bench/tree-store.py +++ b/bench/tree-store.py @@ -9,7 +9,8 @@ Benchmark for TreeStore hierarchical creation, opening, and listing. Creates a hierarchy of N1 levels, each with N2 NDArray leaves and one -CTable (4 cols: bool, int, float, string) with N5 rows. Leaf ``N`` +CTable (20 cols: bool, int, float, string plus 16 numeric columns) with +N5 rows. Leaf ``N`` receives an *N*-dimensional array (leaf0 is 0‑d, leaf1 is 1‑d, …) with each side ``int(MAX_ELEMS ** (1/N))`` so that no array exceeds MAX_ELEMS elements. Everything is written to ``tree-store.b2z`` and the script @@ -33,6 +34,10 @@ # ── Row schema for the CTable ──────────────────────────────────────────── +# 4 base columns plus 16 extra numeric ones (v04..v19), wide enough to +# exceed the data panel viewport of b2view. +NCOLS = 20 + @dataclasses.dataclass class _Row: @@ -40,6 +45,47 @@ class _Row: b: int = blosc2.field(blosc2.int64(), default=0) c: float = blosc2.field(blosc2.float64(), default=0.0) d: str = "" + v04: int = blosc2.field(blosc2.int64(), default=0) + v05: float = blosc2.field(blosc2.float64(), default=0.0) + v06: int = blosc2.field(blosc2.int64(), default=0) + v07: float = blosc2.field(blosc2.float64(), default=0.0) + v08: int = blosc2.field(blosc2.int64(), default=0) + v09: float = blosc2.field(blosc2.float64(), default=0.0) + v10: int = blosc2.field(blosc2.int64(), default=0) + v11: float = blosc2.field(blosc2.float64(), default=0.0) + v12: int = blosc2.field(blosc2.int64(), default=0) + v13: float = blosc2.field(blosc2.float64(), default=0.0) + v14: int = blosc2.field(blosc2.int64(), default=0) + v15: float = blosc2.field(blosc2.float64(), default=0.0) + v16: int = blosc2.field(blosc2.int64(), default=0) + v17: float = blosc2.field(blosc2.float64(), default=0.0) + v18: int = blosc2.field(blosc2.int64(), default=0) + v19: float = blosc2.field(blosc2.float64(), default=0.0) + + +def ctable_values(nrows: int) -> dict[str, np.ndarray]: + """Deterministic column values for the CTable; row *i* is predictable. + + Tests (e.g. tests/b2view/test_basics.py) rely on these formulas to check + that a given viewport shows the expected values: + + - a: i % 2 == 0 + - b: i + - c: i * 1.5 + - d: "str_%06d" % i + - v{k}, even k: i * k + - v{k}, odd k: linspace(0, k, nrows)[i] == i * k / (nrows - 1) + """ + i = np.arange(nrows) + values: dict[str, np.ndarray] = { + "a": i % 2 == 0, + "b": i, + "c": i * 1.5, + "d": np.char.add("str_", np.char.zfill(i.astype("U6"), 6)), + } + for k in range(4, NCOLS): + values[f"v{k:02d}"] = i * k if k % 2 == 0 else np.linspace(0, k, num=nrows) + return values # ── Helpers ────────────────────────────────────────────────────────────── @@ -87,9 +133,16 @@ def create_store( max_elems: int, nrows: int, no_vlmeta: bool = False, + output: str = OUTPUT_FILE, + verbose: bool = True, ) -> tuple[float, int]: """Create the TreeStore; return (wall_clock, total_elements_written).""" - _clean(OUTPUT_FILE) + + def log(*args, **kwargs): + if verbose: + print(*args, **kwargs) + + _clean(output) # Pre-build one array per unique dimensionality (leaf ``i`` → *i*‑d). leaf_arrays_np: dict[int, np.ndarray] = {} @@ -109,25 +162,30 @@ def create_store( total_elements = sum(leaf_arrays_np[ndim].size for ndim in range(nleaves)) * nlevels # Pre-populate a single CTable that we will copy for every level. + # Columns are filled from vectorized, predictable sequences (arange / + # linspace flavored) so they are fast to build and compress very well. tmpl_table = blosc2.CTable(_Row, expected_size=nrows, validate=False) - rows = [(i % 2 == 0, i, float(i) * 1.5, f"str_{i:06d}") for i in range(nrows)] - tmpl_table.extend(rows, validate=False) + cols = ctable_values(nrows) + struct = np.empty(nrows, dtype=[(name, vals.dtype) for name, vals in cols.items()]) + for name, vals in cols.items(): + struct[name] = vals + tmpl_table.extend(struct, validate=False) - print( + log( f"\nCreating TreeStore with {nlevels} level(s), " f"{nleaves} leave(s) each, {nrows} CTable row(s) per level..." ) - print(f" Max elements per leaf: {max_elems:,}") + log(f" Max elements per leaf: {max_elems:,}") for ndim in range(min(nleaves, 10)): shape = _leaf_shape(ndim, max_elems) nelem = int(np.prod(shape)) if shape else 1 - print(f" leaf{ndim}: shape={shape}, elements={nelem:,}, uncompressed={_fmt_bytes(nelem * 8)}") + log(f" leaf{ndim}: shape={shape}, elements={nelem:,}, uncompressed={_fmt_bytes(nelem * 8)}") if nleaves > 10: - print(f" ... ({nleaves - 10} more)") - print(f" CTable rows: {nrows} | uncompressed table size: {_fmt_bytes(tmpl_table.nbytes)}") + log(f" ... ({nleaves - 10} more)") + log(f" CTable rows: {nrows} | uncompressed table size: {_fmt_bytes(tmpl_table.nbytes)}") t0 = time.perf_counter() - tstore = blosc2.TreeStore(OUTPUT_FILE, mode="w") + tstore = blosc2.TreeStore(output, mode="w") try: if not no_vlmeta: @@ -160,12 +218,12 @@ def create_store( ct = tstore[table_key] ct.vlmeta["description"] = f"Level {level} CTable" ct.vlmeta["author"] = "blosc2" - ct.vlmeta["ncols"] = 4 + ct.vlmeta["ncols"] = tmpl_table.ncols ct.vlmeta["has_index"] = True ct.vlmeta["tags_list"] = ["benchmark", "testing", f"level_{level}"] if (level + 1) % max(1, nlevels // 10) == 0 or level == nlevels - 1: - print(f" Level {level + 1}/{nlevels} done ({time.perf_counter() - t0:.2f}s so far)") + log(f" Level {level + 1}/{nlevels} done ({time.perf_counter() - t0:.2f}s so far)") finally: tstore.close() @@ -308,7 +366,8 @@ def main() -> None: for d in range(args.nleaves) ) total_data_bytes = ( - total_elements * 8 + args.nlevels * args.nrows * (1 + 8 + 8 + 16) # rough for table + # rough per-row table size: bool + int64 + float64 + str + 16 numeric cols + total_elements * 8 + args.nlevels * args.nrows * (1 + 8 + 8 + 16 + 16 * 8) ) file_size = os.path.getsize(OUTPUT_FILE) diff --git a/doc/getting_started/b2view.rst b/doc/getting_started/b2view.rst new file mode 100644 index 000000000..75e98c830 --- /dev/null +++ b/doc/getting_started/b2view.rst @@ -0,0 +1,106 @@ +b2view: Browse TreeStore Bundles in the Terminal +================================================ + +The ``b2view`` CLI opens an interactive terminal browser (TUI) for Blosc2 +TreeStore bundles, either sparse directories (``.b2d``) or compact +zip-backed files (``.b2z``). It shows the tree of groups and nodes, the +metadata and vlmeta of the selected node, and a paged view of the data +itself — NDArrays of any dimensionality as well as CTables. + +``b2view`` is installed with python-blosc2; no extra dependencies are +needed. + +Step 1 — Create a sample store +------------------------------ + +Run the snippet below once to produce ``sample.b2z`` with a couple of +arrays and some metadata: + +.. code-block:: python + + import blosc2 + + with blosc2.TreeStore("sample.b2z", mode="w") as tstore: + tstore.vlmeta["author"] = "me" + a = blosc2.linspace(0, 1, num=1_000_000, shape=(1000, 1000)) + a.vlmeta["description"] = "a 2-D linspace" + tstore["/dense/a"] = a + tstore["/dense/b"] = blosc2.arange(10_000, shape=(10, 100, 10)) + +Any existing TreeStore bundle works too — for instance the output of the +``parquet-to-blosc2`` converter (see :doc:`parquet_to_blosc2`). + +Step 2 — Open it +---------------- + +.. code-block:: console + + b2view sample.b2z + +The screen is split into four panels: the **tree** of the bundle on the +left, and **meta**, **vlmeta** and **data** panels for the node selected +in the tree. Move between panels with ``tab`` / ``shift+tab``, maximize +the focused one with ``m`` (``r`` restores it), and quit with ``q``. + +By default the mouse is left to the terminal, so selecting and copying text +works as in any other command line program. Pass ``--mouse`` to let b2view +capture it instead: panels become clickable and the wheel scrolls the data +grid (paging at the boundaries), at the cost of native text selection. + +You can also jump straight to a node and panel: + +.. code-block:: console + + b2view sample.b2z /dense/a --panel data + +Step 3 — Navigate the data panel +-------------------------------- + +The data panel pages through objects far larger than the screen. Press +``?`` at any time for the full key reference; the essentials are: + +================================ ============================================= +Key Action +================================ ============================================= +``up`` / ``down`` move the cursor; pages at the edges +``pageup`` / ``pagedown`` previous / next page of rows +``t`` / ``b`` first / last row +``g`` go to a row number +``left`` / ``right`` move across columns; pages at the edges +``s`` / ``e`` (``home``/``end``) first / last column window +``c`` go to a column index or name +================================ ============================================= + +For N-D arrays, press ``d`` to enter *dim mode*: ``left`` / ``right`` +select the active dimension, ``up`` / ``down`` change its fixed index (or +scroll the viewport), ``enter`` toggles a dimension between fixed and +navigable, and ``escape`` leaves dim mode. + +Step 4 — Filter CTable rows +--------------------------- + +On a CTable node, press ``f`` and type a filter expression to page through +only the matching rows — the same expressions ``CTable.where()`` accepts, +including dotted nested column names and ``and`` / ``or``: + +.. code-block:: text + + payment.tips > 100 and trip.km > 0 and trip.sec > 0 + +The data header shows the active filter and the matching row count; all +navigation (paging, ``g``, ``t`` / ``b``) then operates on the filtered +rows. Press ``escape`` (or submit an empty expression) to go back to the +unfiltered table; each node remembers its filter for the session. + +Columns can be filtered too: press ``/`` and type a case-insensitive +substring (e.g. ``payment``) to show only the matching columns — column +paging and the ``c`` goto-column modal then operate on that subset. Row +and column filters combine freely; ``escape`` clears them one layer at a +time (row filter first, then columns). + +CLI options +----------- + +``--preview-rows N`` and ``--preview-cols N`` bound the size of each data +page (20 rows by 10 columns by default), and ``--panel`` chooses the panel +focused on startup (``tree``, ``meta``, ``vlmeta`` or ``data``). diff --git a/doc/getting_started/index.rst b/doc/getting_started/index.rst index 8d0e00d26..a01f29b92 100644 --- a/doc/getting_started/index.rst +++ b/doc/getting_started/index.rst @@ -9,3 +9,4 @@ Getting Started tutorials dsl_syntax parquet_to_blosc2 + b2view diff --git a/doc/getting_started/installation.rst b/doc/getting_started/installation.rst index dc0a8a991..a6eb56725 100644 --- a/doc/getting_started/installation.rst +++ b/doc/getting_started/installation.rst @@ -23,7 +23,7 @@ Source code git clone https://github.com/Blosc/python-blosc2/ cd python-blosc2 - pip install .[test] # install with test dependencies + pip install . --group test # install with test dependencies That's all. You can proceed with testing section now. diff --git a/pyproject.toml b/pyproject.toml index 09accd8e8..737668bb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ dependencies = [ "numexpr>=2.14.1; platform_machine != 'wasm32'", "pydantic", "requests", + "rich", + "textual", "threadpoolctl; platform_machine != 'wasm32'", ] version = "4.4.4.dev0" @@ -50,7 +52,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [project.optional-dependencies] parquet = ["pyarrow"] -tui = ["textual", "rich"] [project.scripts] parquet-to-blosc2 = "blosc2.cli.parquet_to_blosc2:main" @@ -74,6 +75,8 @@ dev = [ ] test = [ "pytest", + # for the b2view Pilot tests + "pytest-asyncio", "psutil; platform_machine != 'wasm32'", # torch is optional because it is quite large (but will still be used if found) # "torch; platform_machine != 'wasm32'", diff --git a/pytest.ini b/pytest.ini index fc546e68b..60fe3a81e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,6 +9,7 @@ testpaths = markers = heavy: tests that take long time to complete. network: tests that require network access. + tui: b2view Textual UI tests; each one boots a headless app session. filterwarnings = error diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index defe7fa08..7e5557f06 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -2,8 +2,9 @@ from __future__ import annotations -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar +from rich.markup import escape as markup_escape from textual.app import App, ComposeResult from textual.binding import Binding from textual.containers import Horizontal, Vertical, VerticalScroll @@ -11,7 +12,15 @@ from textual.widgets import DataTable, Footer, Header, Input, Static, Tree from blosc2.b2view.model import DataSliceLayout, StoreBrowser -from blosc2.b2view.render import format_cell, make_metadata_renderable, make_preview_renderables +from blosc2.b2view.render import ( + column_float_decimals, + format_cell, + make_metadata_renderable, + make_preview_renderables, +) + +if TYPE_CHECKING: + from textual import events _KIND_ICONS = { "group": "📁", @@ -22,6 +31,9 @@ "unknown": "?", } +# Source kinds whose data grid supports horizontal (column) paging. +_COL_PAGED_KINDS = frozenset({"ndarray2d", "ndarray_slice", "ctable"}) + class B2ViewPanel(Vertical): """Pane container that can be maximized.""" @@ -97,6 +109,31 @@ def action_select_cursor(self) -> None: return super().action_select_cursor() + def _wheel_step(self) -> int: + # Half the visible rows per tick; arrow keys remain the + # single-step path (also for dim-mode index changes). + return max(1, self.row_count // 2) + + def on_mouse_scroll_down(self, event: events.MouseScrollDown) -> None: + # The grid holds exactly one viewport-sized page, so the default + # scroll handler has nothing to scroll; move the cursor instead, + # which pages at the edges just like the arrow keys. + event.stop() + event.prevent_default() + for _ in range(self._wheel_step()): + self.action_cursor_down() + + def on_mouse_scroll_up(self, event: events.MouseScrollUp) -> None: + event.stop() + event.prevent_default() + for _ in range(self._wheel_step()): + self.action_cursor_up() + + def on_resize(self, event) -> None: + # The column/row windows are fitted to this table's size; re-check + # whenever it changes (terminal resize, panel maximize, ...). + getattr(self.app, "_on_data_table_resized", lambda: None)() + def action_scroll_home(self) -> None: if getattr(self.app, "_grid_col_home", lambda: False)(): pass @@ -110,6 +147,106 @@ def action_scroll_end(self) -> None: super().action_scroll_end() +class HelpScreen(ModalScreen[None]): + """Modal listing all key bindings, grouped by area.""" + + CSS = """ + HelpScreen { + align: center middle; + } + #help-dialog { + width: 62; + height: auto; + max-height: 90%; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #help-title { + text-style: bold; + margin-bottom: 1; + } + #help-body { + height: auto; + } + """ + + BINDINGS: ClassVar = [ + ("escape", "close", "Close"), + ("question_mark", "close", "Close"), + ("q", "close", "Close"), + ] + + _SECTIONS: ClassVar = [ + ( + "Panels", + [ + ("tab / shift+tab", "next / previous panel"), + ("m", "maximize the focused panel"), + ("r", "restore panel (or refresh the tree)"), + ("q", "quit"), + ], + ), + ( + "Tree", + [ + ("up / down", "move between nodes"), + ("enter", "select node (and expand groups)"), + ], + ), + ( + "Data grid — rows", + [ + ("up / down", "move cursor; pages at the edges"), + ("pageup / pagedown", "previous / next page"), + ("t / b", "first / last row"), + ("g", "go to row..."), + ("f", "filter rows (CTable)"), + ("escape", "clear the active filter"), + ], + ), + ( + "Data grid — columns", + [ + ("left / right", "move cursor; pages at the edges"), + ("s / e (home / end)", "first / last column window"), + ("c", "go to column index or name..."), + ("/", "filter visible columns by substring (CTable)"), + ], + ), + ( + "Dim mode (N-D arrays)", + [ + ("d", "toggle dim mode"), + ("left / right", "select the active dimension"), + ("up / down", "change fixed index / scroll viewport"), + ("enter", "toggle fixed <-> navigable"), + ("escape", "exit dim mode"), + ], + ), + ] + + def compose(self) -> ComposeResult: + from rich.table import Table + + body = Table(show_header=False, box=None, padding=(0, 1)) + body.add_column("key", style="bold cyan", no_wrap=True) + body.add_column("action") + for i, (section, entries) in enumerate(self._SECTIONS): + if i: + body.add_row("", "") + body.add_row(f"[bold]{section}[/bold]", "") + for key, action in entries: + body.add_row(key, action) + with Vertical(id="help-dialog"): + yield Static("b2view keys (esc to close)", id="help-title") + with VerticalScroll(id="help-body"): + yield Static(body) + + def action_close(self) -> None: + self.dismiss(None) + + class GoToRowScreen(ModalScreen[int | None]): """Small modal asking for a global row number.""" @@ -163,6 +300,131 @@ def action_cancel(self) -> None: self.dismiss(None) +class GoToColumnScreen(ModalScreen[int | None]): + """Small modal asking for a column index or (for CTables) a column name.""" + + CSS = """ + GoToColumnScreen { + align: center middle; + } + #gotocol-dialog { + width: 50; + height: auto; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #gotocol-title { + text-style: bold; + margin-bottom: 1; + } + """ + + BINDINGS: ClassVar = [("escape", "cancel", "Cancel")] + + def __init__(self, *, ncols: int, current: int, names: list[str] | None = None): + super().__init__() + self.ncols = ncols + self.current = current + self.names = names + + def compose(self) -> ComposeResult: + what = f"column 0..{self.ncols - 1}" + if self.names: + what += " or name" + with Vertical(id="gotocol-dialog"): + yield Static(f"Go to {what} (current: {self.current})", id="gotocol-title") + yield Input(placeholder="column index or name", id="gotocol-input") + + def on_mount(self) -> None: + input_widget = self.query_one("#gotocol-input", Input) + input_widget.value = str(self.current) + input_widget.focus() + + def _fail(self, message: str) -> None: + self.query_one("#gotocol-title", Static).update(message) + + def on_input_submitted(self, event: Input.Submitted) -> None: + value = event.value.strip().replace("_", "") + try: + col = int(value) + except ValueError: + col = self._match_name(event.value.strip()) + if col is None: + return + if not 0 <= col < self.ncols: + self._fail(f"Column must be in range 0..{self.ncols - 1}") + return + self.dismiss(col) + + def _match_name(self, value: str) -> int | None: + """Resolve a column name (exact, or unique prefix) to its index.""" + if not self.names: + self._fail("Please enter an integer column index") + return None + if value in self.names: + return self.names.index(value) + matches = [i for i, name in enumerate(self.names) if name.startswith(value)] if value else [] + if len(matches) == 1: + return matches[0] + self._fail(f"{'Ambiguous' if matches else 'Unknown'} column name {value!r}") + return None + + def action_cancel(self) -> None: + self.dismiss(None) + + +class FilterScreen(ModalScreen[str | None]): + """Small modal asking for a CTable filter (row expression or column pattern).""" + + CSS = """ + FilterScreen { + align: center middle; + } + #filter-dialog { + width: 70; + height: auto; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #filter-title { + text-style: bold; + margin-bottom: 1; + } + """ + + BINDINGS: ClassVar = [("escape", "cancel", "Cancel")] + + def __init__( + self, + *, + current: str | None = None, + title: str = "Filter rows (empty clears)", + placeholder: str = "e.g. payment.tips > 100 and trip.km > 0", + ): + super().__init__() + self.current = current or "" + self.title_text = title + self.placeholder = placeholder + + def compose(self) -> ComposeResult: + with Vertical(id="filter-dialog"): + yield Static(self.title_text, id="filter-title") + yield Input(placeholder=self.placeholder, id="filter-input") + + def on_mount(self) -> None: + input_widget = self.query_one("#filter-input", Input) + input_widget.value = self.current + input_widget.focus() + + def on_input_submitted(self, event: Input.Submitted) -> None: + self.dismiss(event.value.strip()) + + def action_cancel(self) -> None: + self.dismiss(None) + + class B2ViewApp(App): """Browse TreeStore hierarchy and preview objects.""" @@ -189,6 +451,7 @@ class B2ViewApp(App): BINDINGS: ClassVar = [ ("q", "quit", "Quit"), + ("question_mark", "show_help", "Help"), ("tab", "focus_next_panel", "Next panel"), ("shift+tab", "focus_previous_panel", "Previous panel"), Binding("g", "go_to_row", "Go to row", show=False), @@ -196,6 +459,11 @@ class B2ViewApp(App): ("r", "restore_or_refresh", "Restore/Refresh"), Binding("t", "grid_row_top", "Top", show=False), Binding("b", "grid_row_bottom", "Bottom", show=False), + Binding("s", "grid_col_start", "Row start", show=False), + Binding("e", "grid_col_end", "Row end", show=False), + Binding("c", "go_to_column", "Go to column", show=False), + Binding("f", "filter_rows", "Filter rows", show=False), + Binding("slash", "filter_columns", "Filter columns", show=False), Binding("d", "dim_cycle", "Dim mode", show=False), Binding("enter", "dim_toggle_nav", "Toggle nav", show=False), Binding("escape", "dim_exit", "Exit dim mode", show=False), @@ -245,7 +513,7 @@ def compose(self) -> ComposeResult: yield Static("", id="vlmetadata") with B2ViewPanel(id="data-pane") as data_pane: data_pane.border_title = "data" - data_pane.border_subtitle = "d(im mode) | t(op) - b(ottom) - g(oto)" + data_pane.border_subtitle = "?(help) | d(im mode) | filter: f(rows) /(cols) | rows: t/b/g(oto) | cols: s/e/c(goto)" yield Static("", id="data-header") with Horizontal(id="data-table-row"): yield BufferedDataTable(id="data-table", show_row_labels=True, zebra_stripes=True) @@ -385,8 +653,10 @@ def update_panels(self, path: str) -> None: else: data = self.browser.preview(path, max_rows=self.preview_rows, max_cols=self.preview_cols) if self._is_table_preview(data): - self._update_data_table(data) + # A freshly selected node starts at the first column + self._update_data_table(data, cursor_col=0) self._update_data_header(data) + self.call_after_refresh(self._ensure_viewport_consistent) else: header, body = make_preview_renderables(data) data_header.display = header is not None @@ -469,6 +739,106 @@ def _col_page_size(self) -> int: usable = max(1, width - 6) return max(1, usable // col_width) + # DataTable pads each cell with one space on both sides (cell_padding=1) + _CELL_PAD = 2 + + def _data_table_width(self) -> int: + return self.query_one("#data-table", DataTable).size.width + + def _col_avail_width(self, nrows: int) -> int: + """Width available for data columns, or 0 before layout has settled.""" + width = self._data_table_width() + if width <= 1: + return 0 + label_width = len(str(max(0, int(nrows) - 1))) + self._CELL_PAD + return max(1, width - label_width) + + def _candidate_max_cols(self) -> int: + """Upper bound of columns worth fetching before the width-based trim.""" + width = self._data_table_width() + if width <= 1: + return self.preview_cols + # The narrowest possible column is one character plus padding. + return max(1, width // (1 + self._CELL_PAD)) + + @classmethod + def _measure_column_widths(cls, data: dict) -> list[int]: + """Rendered width (content + padding) of every column in *data*.""" + widths = [] + for name in data["columns"]: + cells = data["data"][name] + decimals = column_float_decimals(cells) + content = max( + len(str(name)), + max((len(format_cell(value, float_decimals=decimals)) for value in cells), default=1), + ) + widths.append(content + cls._CELL_PAD) + return widths + + def _trim_columns_to_fit(self, data: dict) -> dict: + """Drop trailing columns of *data* that do not fit the table width. + + The preview fetches a generous candidate window of columns; this + second pass measures the actual rendered cell widths and keeps only + as many whole columns as truly fit the data table. + """ + if data.get("source_kind") not in _COL_PAGED_KINDS: + return data + avail = self._col_avail_width(data["nrows"]) + if avail <= 0: + return data # layout not settled; keep the estimate-based window + widths = self._measure_column_widths(data) + keep = 0 + total = 0 + for width in widths: + if keep >= 1 and total + width > avail: + break + total += width + keep += 1 + if keep >= len(data["columns"]): + return data + kept = data["columns"][:keep] + data = dict(data) + data["data"] = {name: data["data"][name] for name in kept} + data["columns"] = kept + data["col_stop"] = data["col_start"] + keep + data["hidden_columns"] = max(0, data["ncols"] - keep) + return data + + def _fetch_columns_for_measure(self, col_start: int, count: int) -> dict: + """Fetch the current page rows for columns [col_start, col_start+count).""" + page = self.table_page + max_rows = max(1, page["stop"] - page["start"]) + layout = self._data_layout + if layout is not None and len(layout.shape) >= 1: + probe = layout.copy_with(row_start=page["start"], col_start=col_start) + return self.browser.preview(self.selected_path, max_rows=max_rows, max_cols=count, layout=probe) + return self.browser.preview( + self.selected_path, + start=page["start"], + stop=page["stop"], + max_cols=count, + col_start=col_start, + ) + + def _fit_col_start_backward(self, end: int) -> int: + """Start of the widest whole-column window ending just before *end*.""" + page = self.table_page + avail = self._col_avail_width(page["nrows"]) + if avail <= 0: + return max(0, end - max(1, self._col_page_size())) + candidate = min(end, max(1, avail // (1 + self._CELL_PAD))) + cand_start = end - candidate + widths = self._measure_column_widths(self._fetch_columns_for_measure(cand_start, candidate)) + start = end - 1 # always keep at least one column + total = widths[-1] + for i in range(len(widths) - 2, -1, -1): + if total + widths[i] > avail: + break + total += widths[i] + start = cand_start + i + return max(0, start) + def _table_page_size(self) -> int: table = self.query_one("#data-table", DataTable) # Keep only rows likely to be visible. The DataTable header consumes one @@ -488,10 +858,11 @@ def _load_table_page(self, path: str, start: int) -> dict: if self.table_buffer is not None: buffer_start = self.table_buffer["start"] buffer_stop = self.table_buffer["stop"] - same_columns = self.table_buffer.get("source_kind") not in {"ndarray2d", "ndarray_slice"} or ( - self.table_buffer.get("col_start") == self.grid_col_start - and self.table_buffer.get("slice_indices") - == ( + buffer_kind = self.table_buffer.get("source_kind") + if buffer_kind in {"ndarray2d", "ndarray_slice"}: + same_columns = self.table_buffer.get( + "col_start" + ) == self.grid_col_start and self.table_buffer.get("slice_indices") == ( [ layout.fixed_values.get(i, 0) for i in range(len(layout.shape)) @@ -500,7 +871,10 @@ def _load_table_page(self, path: str, start: int) -> dict: if layout is not None else [] ) - ) + elif buffer_kind == "ctable": + same_columns = self.table_buffer.get("col_start") == self.grid_col_start + else: + same_columns = True if same_columns and buffer_start <= start and start + page_size <= buffer_stop: data = self._slice_table_buffer(start, page_size) self.table_page = data @@ -518,7 +892,7 @@ def _load_table_page(self, path: str, start: int) -> dict: data = self.browser.preview( path, max_rows=buffer_size, - max_cols=self._col_page_size(), + max_cols=self._candidate_max_cols(), layout=layout, ) else: @@ -528,9 +902,11 @@ def _load_table_page(self, path: str, start: int) -> dict: start=buffer_start, stop=buffer_start + buffer_size, max_rows=buffer_size, - max_cols=self._col_page_size(), + max_cols=self._candidate_max_cols(), col_start=self.grid_col_start, ) + data = self._trim_columns_to_fit(data) + data["viewport_width"] = self._data_table_width() self.table_buffer = data data = self._slice_table_buffer(start, page_size) self.table_page = data @@ -577,22 +953,34 @@ def _slice_table_buffer(self, start: int, page_size: int) -> dict: "ncols", "slice_indices", "n_slices_per_dim", + "viewport_width", ) if key in buffer }, } - def _update_data_table(self, data: dict, *, cursor_row: int = 0, cursor_col: int = 0) -> None: + def _update_data_table(self, data: dict, *, cursor_row: int = 0, cursor_col: int | None = None) -> None: + """Refresh the data grid; *cursor_col* None keeps the current column.""" table = self.query_one("#data-table", DataTable) + if cursor_col is None: + cursor_col = table.cursor_column self.loading_table_page = True try: table.clear(columns=True) for name in data["columns"]: table.add_column(name, key=name) + # Uniform decimals per float column, taken from the whole buffer + # when available so the format is stable while paging rows. + buffer = self.table_buffer + source = buffer if buffer is not None and buffer["columns"] == data["columns"] else data + decimals = {name: column_float_decimals(source["data"][name]) for name in data["columns"]} nrows = data["stop"] - data["start"] for i in range(nrows): table.add_row( - *[format_cell(data["data"][name][i]) for name in data["columns"]], + *[ + format_cell(data["data"][name][i], float_decimals=decimals[name]) + for name in data["columns"] + ], label=str(data["start"] + i), ) nrows = data["stop"] - data["start"] @@ -632,33 +1020,29 @@ def page_grid_columns(self, direction: int) -> bool: if self.loading_table_page or self.table_page is None: return False page = self.table_page - if page.get("source_kind") not in ("ndarray2d", "ndarray_slice"): + if page.get("source_kind") not in _COL_PAGED_KINDS: return False - page_cols = max(1, len(page["columns"])) - ncols = page["ncols"] - col_start = page["col_start"] + # Whole-column windows of data-dependent size: paging right starts at + # the first hidden column; paging left fits as many whole columns as + # possible ending just before the current first one (no skips, no gaps). if direction > 0: - if page["col_stop"] >= ncols: + if page["col_stop"] >= page["ncols"]: return False - self.grid_col_start = min(ncols - 1, col_start + page_cols) - cursor_col = 0 + self.grid_col_start = page["col_stop"] else: - if col_start <= 0: + if page["col_start"] <= 0: return False - self.grid_col_start = max(0, col_start - page_cols) - cursor_col = page_cols - 1 + self.grid_col_start = self._fit_col_start_backward(page["col_start"]) self.table_buffer = None data = self._load_table_page(self.selected_path, page["start"]) cursor_row = self.query_one("#data-table", DataTable).cursor_row + cursor_col = 0 if direction > 0 else len(data["columns"]) - 1 self._update_data_table(data, cursor_row=cursor_row, cursor_col=cursor_col) self._update_data_header(data) return True def _grid_col_home(self) -> bool: - if self.table_page is None or self.table_page.get("source_kind") not in ( - "ndarray2d", - "ndarray_slice", - ): + if self.table_page is None or self.table_page.get("source_kind") not in _COL_PAGED_KINDS: return False self.grid_col_start = 0 self.table_buffer = None @@ -669,18 +1053,15 @@ def _grid_col_home(self) -> bool: return True def _grid_col_end(self) -> bool: - if self.table_page is None or self.table_page.get("source_kind") not in ( - "ndarray2d", - "ndarray_slice", - ): + if self.table_page is None or self.table_page.get("source_kind") not in _COL_PAGED_KINDS: return False page = self.table_page - page_cols = max(1, len(page["columns"])) - self.grid_col_start = max(0, page["ncols"] - page_cols) + # Jump to the widest whole-column window ending at the last column + self.grid_col_start = self._fit_col_start_backward(page["ncols"]) self.table_buffer = None data = self._load_table_page(self.selected_path, page["start"]) cursor_row = self.query_one("#data-table", DataTable).cursor_row - self._update_data_table(data, cursor_row=cursor_row, cursor_col=page_cols - 1) + self._update_data_table(data, cursor_row=cursor_row, cursor_col=len(data["columns"]) - 1) self._update_data_header(data) return True @@ -717,6 +1098,17 @@ def _update_data_header(self, data: dict) -> None: header_parts.append(f"rows {data['start']}:{data['stop']} of {data['nrows']}") if "col_start" in data: header_parts.append(f"cols {data['col_start']}:{data['col_stop']} of {data['ncols']}") + if data.get("source_kind") == "ctable" and self.browser is not None: + flt = self.browser.get_filter(self.selected_path) + col_flt = self.browser.get_column_filter(self.selected_path) + if flt: + total = self.browser.base_nrows(self.selected_path) + header_parts.append(f"filter: [bold]{markup_escape(flt)}[/bold] ({total} total)") + if col_flt: + total_cols = self.browser.base_ncols(self.selected_path) + header_parts.append(f"cols: [bold]{markup_escape(col_flt)}[/bold] ({total_cols} total)") + if flt or col_flt: + header_parts.append("clear") line = ", ".join(header_parts) if self._dim_mode and layout is not None: @@ -752,7 +1144,7 @@ def _update_global_row_scrollbar(self, data: dict) -> None: def _update_global_col_scrollbar(self, data: dict) -> None: scrollbar = self.query_one("#col-scrollbar", Static) - if data.get("source_kind") not in ("ndarray2d", "ndarray_slice"): + if data.get("source_kind") not in _COL_PAGED_KINDS: scrollbar.display = False scrollbar.update("") return @@ -826,6 +1218,84 @@ def action_go_to_row(self) -> None: screen = GoToRowScreen(nrows=self.table_page["nrows"], current=current) self.push_screen(screen, self._go_to_row) + def action_go_to_column(self) -> None: + if not self._in_data_grid(): + return + page = self.table_page + if page.get("source_kind") not in _COL_PAGED_KINDS: + return + current = page["col_start"] + self.query_one("#data-table", DataTable).cursor_column + names = self.browser.column_names(self.selected_path) if page["source_kind"] == "ctable" else None + screen = GoToColumnScreen(ncols=page["ncols"], current=current, names=names) + self.push_screen(screen, self._go_to_column) + + def action_filter_rows(self) -> None: + if not self._in_data_grid(): + return + if self.table_page.get("source_kind") != "ctable": + self.notify("Filtering is only supported for CTable nodes", severity="warning") + return + screen = FilterScreen(current=self.browser.get_filter(self.selected_path)) + self.push_screen(screen, self._apply_filter) + + def _apply_filter(self, expr: str | None) -> None: + if expr is None or self.browser is None or self.table_page is None: + return + if expr == (self.browser.get_filter(self.selected_path) or ""): + return + try: + self.browser.set_filter(self.selected_path, expr) + except Exception as exc: + self.notify(f"Invalid filter: {exc}", severity="error") + return + self.table_buffer = None + data = self._load_table_page(self.selected_path, 0) + self._update_data_table(data, cursor_row=0, cursor_col=0) + self._update_data_header(data) + self.query_one("#data-table", DataTable).focus() + + def action_filter_columns(self) -> None: + if not self._in_data_grid(): + return + if self.table_page.get("source_kind") != "ctable": + self.notify("Column filtering is only supported for CTable nodes", severity="warning") + return + screen = FilterScreen( + current=self.browser.get_column_filter(self.selected_path), + title="Filter columns by substring (empty clears)", + placeholder="e.g. payment", + ) + self.push_screen(screen, self._apply_column_filter) + + def _apply_column_filter(self, pattern: str | None) -> None: + if pattern is None or self.browser is None or self.table_page is None: + return + if pattern == (self.browser.get_column_filter(self.selected_path) or ""): + return + try: + self.browser.set_column_filter(self.selected_path, pattern) + except Exception as exc: + self.notify(f"Invalid column filter: {exc}", severity="error") + return + self.grid_col_start = 0 + self.table_buffer = None + data = self._load_table_page(self.selected_path, self.table_page["start"]) + cursor_row = self.query_one("#data-table", DataTable).cursor_row + self._update_data_table(data, cursor_row=cursor_row, cursor_col=0) + self._update_data_header(data) + self.query_one("#data-table", DataTable).focus() + + def _go_to_column(self, col: int | None) -> None: + if col is None or self.table_page is None: + return + self.grid_col_start = col + self.table_buffer = None + data = self._load_table_page(self.selected_path, self.table_page["start"]) + cursor_row = self.query_one("#data-table", DataTable).cursor_row + self._update_data_table(data, cursor_row=cursor_row, cursor_col=0) + self._update_data_header(data) + self.query_one("#data-table", DataTable).focus() + def _focused_pane(self): focused = self.focused if focused is None: @@ -851,6 +1321,30 @@ def action_restore_or_refresh(self) -> None: return self.action_refresh() + def _ensure_viewport_consistent(self) -> None: + """Reload the page if it was sized before the layout had settled. + + The first page of a node may be loaded while the DataTable still has + no size, in which case the CLI fallbacks (preview_rows/preview_cols) + determine the window. Later paging then uses the settled viewport + sizes, so the windows would drift unless we reload once here. + """ + page = self.table_page + if page is None or not self.query_one("#data-table-row", Horizontal).display: + return + rows_loaded = page["stop"] - page["start"] + rows_want = min(self._table_page_size(), page["nrows"] - page["start"]) + cols_ok = True + if page.get("source_kind") in _COL_PAGED_KINDS: + # The column window is fitted to the width current at load time + cols_ok = page.get("viewport_width") == self._data_table_width() + if rows_loaded == rows_want and cols_ok: + return + self._reload_table_for_current_viewport() + + def _on_data_table_resized(self) -> None: + self.call_after_refresh(self._ensure_viewport_consistent) + def _reload_table_for_current_viewport(self) -> None: """Reload the table page after layout changes such as maximize/restore.""" if self.table_page is None or not self.query_one("#data-table-row", Horizontal).display: @@ -884,7 +1378,7 @@ def action_refresh(self) -> None: def _adjust_fixed_value(self, direction: int) -> None: """Adjust the fixed value of the active dimension (if it is fixed). - In DIM mode the value wraps around at boundaries (0 ↔ max). + The value clamps at the boundaries (no wrap-around). """ layout = self._data_layout if layout is None or self.table_page is None: @@ -896,19 +1390,9 @@ def _adjust_fixed_value(self, direction: int) -> None: if total <= 0: return current = layout.fixed_values[dim] - if self._dim_mode and total > 1: - # Cycle: up at max → 0, down at 0 → max-1 - new_val = (current + direction) % total - else: - # Clamp at boundaries (normal mode) - if direction > 0: - if current >= total - 1: - return - new_val = current + 1 - else: - if current <= 0: - return - new_val = current - 1 + new_val = min(max(current + direction, 0), total - 1) + if new_val == current: + return new_fixed = dict(layout.fixed_values) new_fixed[dim] = new_val self._data_layout = layout.copy_with(fixed_values=new_fixed) @@ -988,7 +1472,7 @@ def _dim_adjust(self, direction: int) -> None: self._scroll_navigable_viewport(direction) def _scroll_navigable_viewport(self, direction: int) -> None: - """Shift the viewport of a navigable dimension by one step (wraps).""" + """Shift the viewport of a navigable dimension by one step (clamps).""" layout = self._data_layout if layout is None or self.table_page is None: return @@ -1001,13 +1485,19 @@ def _scroll_navigable_viewport(self, direction: int) -> None: total = layout.shape[dim] if pos == 0: - # Row navigable dim — shift start by one row (wraps) - new_start = (page["start"] + direction) % total + # Row navigable dim — shift start by one row, keeping a full page + max_start = max(0, total - self._table_page_size()) + new_start = min(max(page["start"] + direction, 0), max_start) + if new_start == page["start"]: + return self.table_buffer = None data = self._load_table_page(self.selected_path, new_start) else: - # Column navigable dim — shift col_start by one column (wraps) - new_col = (page["col_start"] + direction) % total + # Column navigable dim — shift col_start by one whole column + max_col = self._fit_col_start_backward(total) + new_col = min(max(page["col_start"] + direction, 0), max_col) + if new_col == page["col_start"]: + return self.grid_col_start = new_col self.table_buffer = None data = self._load_table_page(self.selected_path, page["start"]) @@ -1035,12 +1525,26 @@ def action_dim_toggle_nav(self) -> None: self._dim_toggle() def action_dim_exit(self) -> None: - """Escape: exit dim mode.""" - if not self._dim_mode: + """Escape: exit dim mode, or clear an active CTable filter. + + One layer per press: dim mode, then the row filter, then the + column filter. + """ + if self._dim_mode: + self._dim_mode = False + if self.table_page is not None: + self._update_data_header(self.table_page) return - self._dim_mode = False - if self.table_page is not None: - self._update_data_header(self.table_page) + if ( + not self._in_data_grid() + or self.table_page.get("source_kind") != "ctable" + or self.browser is None + ): + return + if self.browser.get_filter(self.selected_path): + self._apply_filter("") + elif self.browser.get_column_filter(self.selected_path): + self._apply_column_filter("") def action_grid_row_top(self) -> None: """Jump to the first row of the table.""" @@ -1053,3 +1557,16 @@ def action_grid_row_bottom(self) -> None: if not self._in_data_grid(): return self._go_to_row(self.table_page["nrows"] - 1) + + def action_show_help(self) -> None: + self.push_screen(HelpScreen()) + + def action_grid_col_start(self) -> None: + """Jump to the first column window (alias of Home).""" + if self._in_data_grid(): + self._grid_col_home() + + def action_grid_col_end(self) -> None: + """Jump to the last column window (alias of End).""" + if self._in_data_grid(): + self._grid_col_end() diff --git a/src/blosc2/b2view/cli.py b/src/blosc2/b2view/cli.py index d45f4599e..614030112 100644 --- a/src/blosc2/b2view/cli.py +++ b/src/blosc2/b2view/cli.py @@ -18,6 +18,11 @@ def build_parser() -> argparse.ArgumentParser: default="tree", help="Panel to focus on startup", ) + parser.add_argument( + "--mouse", + action="store_true", + help="Capture the mouse for clicking and scrolling (disables the terminal's native text selection)", + ) return parser @@ -27,9 +32,7 @@ def main(argv: list[str] | None = None) -> int: from blosc2.b2view.app import B2ViewApp except ImportError as exc: print( - "b2view requires the optional TUI dependencies. Install them with:\n" - "\n" - ' pip install "blosc2[tui]"\n', + "b2view could not import its TUI dependencies. Install them with:\n\n pip install textual\n", file=sys.stderr, ) print(f"Original import error: {exc}", file=sys.stderr) @@ -42,7 +45,7 @@ def main(argv: list[str] | None = None) -> int: preview_rows=args.preview_rows, preview_cols=args.preview_cols, ) - app.run() + app.run(mouse=args.mouse) return 0 diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 8fc5f6308..1bde09e07 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -136,6 +136,12 @@ def __init__(self, urlpath: str): self.urlpath = urlpath self.store = blosc2.open(urlpath, mode="r") self.is_tree = isinstance(self.store, blosc2.TreeStore) + # Per-path row filters for CTable nodes (path -> expr / where() view) + self._filters: dict[str, str] = {} + self._filter_views: dict[str, Any] = {} + # Per-path column filters (path -> substring pattern / matched names) + self._column_filters: dict[str, str] = {} + self._column_selections: dict[str, list[str]] = {} def close(self) -> None: close = getattr(self.store, "close", None) @@ -260,12 +266,86 @@ def preview( return preview_array_1d(obj, start=start, stop=stop) return preview_array(obj, slices=slices, max_rows=max_rows, max_cols=max_cols) if kind == "ctable": + obj = self._filter_views.get(path, obj) + if columns is None: + columns = self._column_selections.get(path) stop = min(start + max_rows, len(obj)) if stop is None else stop - return preview_ctable(obj, start=start, stop=stop, columns=columns, max_cols=max_cols) + return preview_ctable( + obj, start=start, stop=stop, columns=columns, max_cols=max_cols, col_start=col_start + ) if kind == "schunk": return {"message": "SChunk byte preview is not implemented yet."} return {"message": f"Preview is not supported for {kind!r} objects."} + def column_names(self, path: str) -> list[str] | None: + """Return the column names for a CTable path, or None for other kinds. + + When a column filter is active, only the matching names are returned + (navigation operates on the filtered universe). + """ + path = self.normalize_path(path) + selection = self._column_selections.get(path) + if selection is not None: + return list(selection) + names = list(getattr(self._get_object(path), "col_names", []) or []) + return names or None + + def set_filter(self, path: str, expr: str | None) -> int: + """Set or clear the row filter of a CTable path; return its row count. + + An empty (or None) *expr* clears the filter. Errors from ``where()`` + propagate to the caller and leave any previous filter untouched. + """ + path = self.normalize_path(path) + expr = (expr or "").strip() + if not expr: + self._filters.pop(path, None) + self._filter_views.pop(path, None) + return len(self._get_object(path)) + view = self._get_object(path).where(expr) + self._filters[path] = expr + self._filter_views[path] = view + return len(view) + + def get_filter(self, path: str) -> str | None: + """Return the active filter expression for *path*, if any.""" + return self._filters.get(self.normalize_path(path)) + + def base_nrows(self, path: str) -> int: + """Return the unfiltered row count of the CTable at *path*.""" + return len(self._get_object(path)) + + def set_column_filter(self, path: str, pattern: str | None) -> int: + """Set or clear the column filter of a CTable path; return the match count. + + Columns are matched by case-insensitive substring, keeping the table + order. An empty (or None) *pattern* clears the filter. A pattern + matching no column raises ValueError and leaves any previous filter + untouched. + """ + path = self.normalize_path(path) + pattern = (pattern or "").strip() + all_names = list(getattr(self._get_object(path), "col_names", []) or []) + if not pattern: + self._column_filters.pop(path, None) + self._column_selections.pop(path, None) + return len(all_names) + needle = pattern.lower() + selection = [name for name in all_names if needle in name.lower()] + if not selection: + raise ValueError(f"no column matches {pattern!r}") + self._column_filters[path] = pattern + self._column_selections[path] = selection + return len(selection) + + def get_column_filter(self, path: str) -> str | None: + """Return the active column filter pattern for *path*, if any.""" + return self._column_filters.get(self.normalize_path(path)) + + def base_ncols(self, path: str) -> int: + """Return the unfiltered column count of the CTable at *path*.""" + return len(list(getattr(self._get_object(path), "col_names", []) or [])) + def _get_object(self, path: str) -> Any: """Return the object represented by *path*.""" path = self.normalize_path(path) @@ -596,18 +676,25 @@ def preview_ctable( stop: int = 20, columns: list[str] | None = None, max_cols: int = 10, + col_start: int = 0, include_expensive: bool = False, ) -> dict[str, Any]: """Return a bounded column-oriented preview from a CTable. + *col_start* selects the first visible column, so wide tables can be + paged horizontally just like 2-D arrays. + Complex nested/list/object columns may require one variable-length block read per row. By default, keep table navigation responsive by showing a placeholder for those columns instead of decoding them eagerly. """ all_columns = list(getattr(obj, "col_names", [])) - visible_columns = all_columns if columns is None else [name for name in columns if name in all_columns] - hidden_columns = max(0, len(visible_columns) - max_cols) - visible_columns = visible_columns[:max_cols] + selectable = all_columns if columns is None else [name for name in columns if name in all_columns] + ncols = len(selectable) + col_start = max(0, min(col_start, max(0, ncols - 1))) + col_stop = min(col_start + max_cols, ncols) + visible_columns = selectable[col_start:col_stop] + hidden_columns = max(0, ncols - len(visible_columns)) start = max(0, start) stop = min(max(start, stop), len(obj)) data = {} @@ -629,6 +716,10 @@ def preview_ctable( "hidden_columns": hidden_columns, "skipped_columns": skipped_columns, "data": data, + "source_kind": "ctable", + "col_start": col_start, + "col_stop": col_stop, + "ncols": ncols, } diff --git a/src/blosc2/b2view/render.py b/src/blosc2/b2view/render.py index 207b9c110..82df2de37 100644 --- a/src/blosc2/b2view/render.py +++ b/src/blosc2/b2view/render.py @@ -105,7 +105,34 @@ def _format_metadata_value(value: Any) -> str: return str(value) -def format_cell(value: Any) -> str: +def column_float_decimals(values: Any) -> int | None: + """Return a uniform decimal count for a float column, or None. + + The count derives from the column's maximum magnitude so every cell fits + the same ~9 character budget that _fmt_float uses per value: digits move + from the fraction to the integer part as magnitudes grow, but uniformly + for the whole column, keeping the decimal points aligned. + + Returns None when *values* is not a float column or when its magnitude + calls for scientific notation (handled per value by _fmt_float). + """ + arr = np.asarray(values) + if arr.dtype.kind != "f" or arr.size == 0: + return None + finite = arr[np.isfinite(arr)] + if finite.size == 0: + return None + largest = float(np.max(np.abs(finite))) + if largest == 0: + return 1 # an all-zero column reads best as plain 0.0 + if largest >= 1e9 or largest < 1e-6: + return None + int_digits = max(1, int(np.floor(np.log10(largest))) + 1) + # 9-char budget: sign/pad + int digits + decimal point + decimals + return max(0, 7 - int_digits) + + +def format_cell(value: Any, *, float_decimals: int | None = None) -> str: if isinstance(value, np.generic): value = value.item() if isinstance(value, np.ndarray): @@ -113,7 +140,7 @@ def format_cell(value: Any) -> str: elif isinstance(value, (list, tuple, dict)): text = pformat(value, compact=True, width=80) elif isinstance(value, float): - text = _fmt_float(value) + text = _fmt_float(value) if float_decimals is None else f"{value:9.{float_decimals}f}" else: text = str(value) text = " ".join(text.splitlines()) diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py new file mode 100644 index 000000000..e8773cfcd --- /dev/null +++ b/tests/b2view/test_basics.py @@ -0,0 +1,605 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Basic keyboard-navigation tests for the b2view TUI. + +The store is generated with ``tree_store_gen.py`` (small parameters, a few +hundred KB) so every cell value is predictable: NDArray leaves come from +``blosc2.linspace(0, 1, ...)`` and the per-level CTable columns follow the +formulas documented in ``ctable_values()`` of that module. + +All tests drive the real Textual app through a ``Pilot`` (headless terminal +of a fixed size), pressing the same keys a user would, and then assert on +the loaded page (``app.table_page``) and the underlying values. The focus +is navigation of objects *larger than the data panel viewport*: row paging, +column paging, jump-to-row, and dim-mode for N-D arrays. + +NOTE for test authors (humans and LLMs alike): booting an app session +(``app.run_test()``) costs ~0.3 s and every key press ~0.1 s, dwarfing the +assertions themselves. When adding tests, do NOT create a new session per +scenario: extend an existing test that already starts at the right node, or +group related scenarios that share a start state into one self-contained +keyboard journey (see ``test_ctable_column_paging`` for the pattern). Only +start a new session when the scenarios genuinely need independent app state. +Deselect the whole TUI suite with ``pytest -m "not tui"``. +""" + +from __future__ import annotations + +import numpy as np +import pytest + +pytest.importorskip("textual") +pytest.importorskip("pytest_asyncio") + +import blosc2 + +if blosc2.IS_WASM: + # Instantiating a Textual app selects a terminal driver, and the Linux + # driver needs termios, which Emscripten does not provide. + pytest.skip("Textual apps need a terminal driver (termios)", allow_module_level=True) + +import tree_store_gen as gen +from textual.widgets import DataTable, Input, Tree + +from blosc2.b2view.app import B2ViewApp, FilterScreen, GoToColumnScreen, GoToRowScreen, HelpScreen + +pytestmark = [pytest.mark.asyncio, pytest.mark.tui] + +# ── Store generation (via tree_store_gen.py, next to this module) ──────── + +NLEVELS = 2 +NLEAVES = 4 # leaf0: scalar, leaf1: 1-D, leaf2: 2-D, leaf3: 3-D +MAX_ELEMS = 10_000 +NROWS = 300 # CTable rows; well beyond one viewport page + +# Shapes produced by leaf_shape() for the parameters above +LEAF1_LEN = 10_000 +LEAF2_SHAPE = (100, 100) +LEAF3_SHAPE = (21, 21, 21) + +# Fixed terminal size for deterministic viewports +TERM_SIZE = (120, 40) + + +@pytest.fixture(scope="session") +def store_path(tmp_path_factory) -> str: + path = tmp_path_factory.mktemp("b2view") / "tree-store.b2z" + gen.create_store(NLEVELS, NLEAVES, MAX_ELEMS, NROWS, output=str(path)) + return str(path) + + +# ── Helpers ────────────────────────────────────────────────────────────── + + +async def wait_for_table(pilot) -> None: + """Wait until the data grid has a loaded, settled page.""" + for _ in range(100): + await pilot.pause() + app = pilot.app + if app.table_page is not None and not app.loading_table_page: + return + raise AssertionError("data table never finished loading") + + +async def focus_data_table(pilot) -> DataTable: + table = pilot.app.query_one("#data-table", DataTable) + table.focus() + await pilot.pause() + return table + + +def leaf1_values() -> np.ndarray: + return np.linspace(0, 1, num=LEAF1_LEN) + + +def leaf2_values() -> np.ndarray: + return np.linspace(0, 1, num=int(np.prod(LEAF2_SHAPE))).reshape(LEAF2_SHAPE) + + +def leaf3_values() -> np.ndarray: + return np.linspace(0, 1, num=int(np.prod(LEAF3_SHAPE))).reshape(LEAF3_SHAPE) + + +def _assert_ctable_window_values(page, expected): + """Check every visible cell of *page* against the generator columns.""" + for name in page["columns"]: + got = page["data"][name] + want = expected[name][page["start"] : page["stop"]] + if np.issubdtype(np.asarray(want).dtype, np.number): + np.testing.assert_allclose(np.asarray(got, dtype=float), want) + else: + np.testing.assert_array_equal(got, want) + + +# ── Tree and panel focus navigation ────────────────────────────────────── + + +async def test_tree_and_panel_focus(store_path): + """Tab cycles the panels; Down/Enter in the tree selects nodes.""" + app = B2ViewApp(store_path) + async with app.run_test(size=TERM_SIZE) as pilot: + await pilot.pause() + assert isinstance(app.focused, Tree) + + # Tab: tree -> meta -> vlmeta -> data and wraps back to the tree + for expected in ["meta-scroll", "vlmeta-scroll", "data-scroll", "tree"]: + await pilot.press("tab") + await pilot.pause() + assert app.focused is not None + assert app.focused.id == expected + + await pilot.press("down", "enter") # root -> level0, select + expand + await pilot.pause() + assert app.selected_path == "/level0" + + first_child = app.browser.list_children("/level0")[0] + await pilot.press("down", "enter") # -> first child of level0 + await wait_for_table(pilot) + assert app.selected_path == first_child.path + + # '?' opens the help screen; escape closes it + await pilot.press("question_mark") + await pilot.pause() + assert isinstance(app.screen, HelpScreen) + await pilot.press("escape") + await pilot.pause() + assert not isinstance(app.screen, HelpScreen) + + +# ── 1-D array: row paging beyond the viewport ──────────────────────────── + + +async def test_1d_row_paging_and_jumps(store_path): + """Cursor-down at the last row pages forward; 'b'/'t' jump to bottom/top.""" + app = B2ViewApp(store_path, start_path="/level0/leaf1", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + page = app.table_page + assert page["nrows"] == LEAF1_LEN + assert page["start"] == 0 + first_stop = page["stop"] + assert first_stop < LEAF1_LEN # viewport smaller than the array + + expected = leaf1_values() + np.testing.assert_allclose(page["data"]["value"], expected[: page["stop"]]) + + # Move the cursor to the last row of the page and step once more + table.move_cursor(row=page["stop"] - page["start"] - 1) + await pilot.press("down") + await wait_for_table(pilot) + + page = app.table_page + assert page["start"] == first_stop # new page starts where the old ended + assert table.cursor_row == 0 + np.testing.assert_allclose(page["data"]["value"], expected[page["start"] : page["stop"]]) + + # 'b' jumps to the very last row of the array + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == LEAF1_LEN + assert page["start"] + table.cursor_row == LEAF1_LEN - 1 + np.testing.assert_allclose(page["data"]["value"], expected[page["start"] : page["stop"]]) + + # ...and 't' back to the first + await pilot.press("t") + await wait_for_table(pilot) + page = app.table_page + assert page["start"] == 0 + assert page["start"] + table.cursor_row == 0 + + +# ── 2-D array: row *and* column paging beyond the viewport ─────────────── + + +async def test_2d_paging(store_path): + """Column paging shows the right values; row paging stops at the bottom.""" + app = B2ViewApp(store_path, start_path="/level0/leaf2", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + page = app.table_page + assert page["ncols"] == LEAF2_SHAPE[1] + assert page["col_start"] == 0 + first_col_stop = page["col_stop"] + assert first_col_stop < LEAF2_SHAPE[1] # more columns than the viewport + + expected = leaf2_values() + # Column labels are the global column indices + assert page["columns"] == [str(c) for c in range(page["col_start"], page["col_stop"])] + for c in range(page["col_start"], page["col_stop"]): + np.testing.assert_allclose(page["data"][str(c)], expected[page["start"] : page["stop"], c]) + + # Move the cursor to the last visible column and step right once more + table.move_cursor(column=len(page["columns"]) - 1) + await pilot.press("right") + await wait_for_table(pilot) + + page = app.table_page + assert page["col_start"] == first_col_stop + assert table.cursor_column == 0 + for c in range(page["col_start"], page["col_stop"]): + np.testing.assert_allclose(page["data"][str(c)], expected[page["start"] : page["stop"], c]) + + # Row paging stops at the bottom: 'b', then one more down is a no-op + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == LEAF2_SHAPE[0] + last_cursor = table.cursor_row + + await pilot.press("down") # already at the last row: must not page/move + await wait_for_table(pilot) + assert app.table_page["stop"] == LEAF2_SHAPE[0] + assert table.cursor_row == last_cursor + + # 'end' jumps to the widest whole-column window ending at the last + # column; paging left from there must not skip any column. + await pilot.press("end") + await wait_for_table(pilot) + page = app.table_page + assert page["col_stop"] == LEAF2_SHAPE[1] + end_col_start = page["col_start"] + assert end_col_start > 0 + + table.move_cursor(column=0) + await pilot.press("left") + await wait_for_table(pilot) + page = app.table_page + assert page["col_start"] < end_col_start + assert page["col_stop"] >= end_col_start # no column skipped + for c in range(page["col_start"], page["col_stop"]): + np.testing.assert_allclose(page["data"][str(c)], expected[page["start"] : page["stop"], c]) + + # 'home' returns to the first column window + await pilot.press("home") + await wait_for_table(pilot) + assert app.table_page["col_start"] == 0 + + # 'c' jumps to a column by index (arrays have no column names) + await pilot.press("c") + await pilot.pause() + assert isinstance(app.screen, GoToColumnScreen) + app.screen.query_one("#gotocol-input", Input).value = "97" + await pilot.press("enter") + await wait_for_table(pilot) + page = app.table_page + assert page["col_start"] == 97 + assert page["col_stop"] == LEAF2_SHAPE[1] + np.testing.assert_allclose(page["data"]["97"], expected[page["start"] : page["stop"], 97]) + + +# ── 3-D array: dim mode navigation ─────────────────────────────────────── + + +async def test_3d_dim_mode_fixed_value(store_path): + """In dim mode, up/down change the fixed index of the active dimension.""" + app = B2ViewApp(store_path, start_path="/level0/leaf3", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + await focus_data_table(pilot) + + layout = app._data_layout + assert layout is not None + assert layout.fixed_values == {0: 0} + assert layout.navigable_dims == [1, 2] + + await pilot.press("d") # enter dim mode (active dim is d0, fixed) + assert app._dim_mode + + await pilot.press("up") # d0: 0 -> 1 + await wait_for_table(pilot) + assert app._data_layout.fixed_values[0] == 1 + + page = app.table_page + expected = leaf3_values()[1] # the d0=1 slice + for c in range(page["col_start"], page["col_stop"]): + np.testing.assert_allclose(page["data"][str(c)], expected[page["start"] : page["stop"], c]) + + await pilot.press("escape") + assert not app._dim_mode + + +# ── CTable: row paging, goto, and wide tables ──────────────────────────── + + +async def test_ctable_row_paging_and_goto(store_path): + """Row paging and the 'g'(oto) modal land on the expected CTable rows.""" + app = B2ViewApp(store_path, start_path="/level0/ctable", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + page = app.table_page + assert page["nrows"] == NROWS + expected = gen.ctable_values(NROWS) + np.testing.assert_array_equal(page["data"]["b"], expected["b"][: page["stop"]]) + + # Row paging and jumps must keep the cursor on the current column + cursor_col = page["columns"].index("c") + table.move_cursor(column=cursor_col) + + await pilot.press("pagedown") + await wait_for_table(pilot) + assert app.table_page["start"] > 0 + assert table.cursor_column == cursor_col + + await pilot.press("pageup") + await wait_for_table(pilot) + assert app.table_page["start"] == 0 + assert table.cursor_column == cursor_col + + # 'b' jumps to the last row + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == NROWS + assert page["start"] + table.cursor_row == NROWS - 1 + assert table.cursor_column == cursor_col + + # 'g' opens the goto modal; submit a row in the middle + await pilot.press("g") + await pilot.pause() + assert isinstance(app.screen, GoToRowScreen) + app.screen.query_one("#goto-input", Input).value = "250" + await pilot.press("enter") + await wait_for_table(pilot) + + page = app.table_page + assert page["start"] <= 250 < page["stop"] + assert page["start"] + table.cursor_row == 250 + assert table.cursor_column == cursor_col # goto keeps the column too + np.testing.assert_array_equal(page["data"]["b"], expected["b"][page["start"] : page["stop"]]) + + +async def test_ctable_column_paging(store_path): + """A 20-column CTable pages columns left/right without losing the row.""" + app = B2ViewApp(store_path, start_path="/level0/ctable", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + all_names = list(gen.ctable_values(1).keys()) + expected = gen.ctable_values(NROWS) + + # The table does not fit: hidden columns and a horizontal scrollbar + page = app.table_page + first_columns = list(page["columns"]) + assert gen.NCOLS == 20 + assert page["col_start"] == 0 + assert page["ncols"] == gen.NCOLS + assert len(first_columns) < gen.NCOLS + assert page["hidden_columns"] == gen.NCOLS - len(first_columns) + # The visible columns are the leading ones, in schema order + assert first_columns == all_names[: len(first_columns)] + assert app.query_one("#col-scrollbar").display + # The two-pass fit must not overflow the table (no inner h-scroll) + assert table.virtual_size.width <= table.size.width + + # Page right from the last visible column + table.move_cursor(column=len(first_columns) - 1) + await pilot.press("right") + await wait_for_table(pilot) + + page = app.table_page + assert page["col_start"] == len(first_columns) + assert page["columns"] == all_names[page["col_start"] : page["col_stop"]] + assert table.cursor_column == 0 + _assert_ctable_window_values(page, expected) + + # ...and page back left from the first visible column + right_columns = list(page["columns"]) + table.move_cursor(column=0) + await pilot.press("left") + await wait_for_table(pilot) + + page = app.table_page + assert page["col_start"] == 0 + assert page["columns"] == first_columns + assert table.cursor_column == len(right_columns) - 1 + _assert_ctable_window_values(page, expected) + + # 'e' jumps to the widest whole-column window ending at the last + # column, and paging left from there must not skip any column. + # ('s'/'e' are aliases of Home/End, which the 2-D test covers.) + await pilot.press("e") + await wait_for_table(pilot) + page = app.table_page + assert page["col_stop"] == gen.NCOLS + end_col_start = page["col_start"] + assert end_col_start > 0 + assert table.cursor_column == len(page["columns"]) - 1 + + table.move_cursor(column=0) + await pilot.press("left") + await wait_for_table(pilot) + page = app.table_page + assert page["col_start"] < end_col_start + assert page["col_stop"] >= end_col_start # no column skipped + assert page["columns"] == all_names[page["col_start"] : page["col_stop"]] + _assert_ctable_window_values(page, expected) + + # 's' returns to the first window + await pilot.press("s") + await wait_for_table(pilot) + assert app.table_page["col_start"] == 0 + + # Column paging must not lose the current row: goto 150, page right + await pilot.press("g") + await pilot.pause() + app.screen.query_one("#goto-input", Input).value = "150" + await pilot.press("enter") + await wait_for_table(pilot) + page = app.table_page + assert page["start"] + table.cursor_row == 150 + + table.move_cursor(column=len(page["columns"]) - 1) + await pilot.press("right") + await wait_for_table(pilot) + + page = app.table_page + assert page["col_start"] > 0 + assert page["start"] + table.cursor_row == 150 + _assert_ctable_window_values(page, expected) + + # 'c' goes to a column by name; the row position is kept + await pilot.press("c") + await pilot.pause() + assert isinstance(app.screen, GoToColumnScreen) + app.screen.query_one("#gotocol-input", Input).value = "v12" + await pilot.press("enter") + await wait_for_table(pilot) + page = app.table_page + assert page["col_start"] == all_names.index("v12") + assert page["columns"][0] == "v12" + assert table.cursor_column == 0 + assert page["start"] + table.cursor_row == 150 + _assert_ctable_window_values(page, expected) + + # An ambiguous name prefix keeps the modal open; escape cancels + await pilot.press("c") + await pilot.pause() + app.screen.query_one("#gotocol-input", Input).value = "v1" + await pilot.press("enter") + await pilot.pause() + assert isinstance(app.screen, GoToColumnScreen) + await pilot.press("escape") + await wait_for_table(pilot) + assert app.table_page["col_start"] == all_names.index("v12") + + # ...and a numeric index works as well + await pilot.press("c") + await pilot.pause() + app.screen.query_one("#gotocol-input", Input).value = "0" + await pilot.press("enter") + await wait_for_table(pilot) + assert app.table_page["col_start"] == 0 + + # Shrinking the terminal re-fits the column window to the new width + wide_columns = list(app.table_page["columns"]) + await pilot.resize_terminal(80, 40) + for _ in range(100): + await pilot.pause() + if not app.loading_table_page and app.table_page.get("viewport_width") == table.size.width: + break + page = app.table_page + assert page["viewport_width"] == table.size.width + assert len(page["columns"]) < len(wide_columns) + assert table.virtual_size.width <= table.size.width + + +# ── CTable filtering ───────────────────────────────────────────────────── + + +async def test_ctable_filtering(store_path): + """The 'f' modal filters CTable rows; errors and clearing keep state sane.""" + app = B2ViewApp(store_path, start_path="/level0/ctable", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + await focus_data_table(pilot) + expected = gen.ctable_values(NROWS) + + async def submit_filter(expr: str) -> None: + await pilot.press("f") + await pilot.pause() + assert isinstance(app.screen, FilterScreen) + app.screen.query_one("#filter-input", Input).value = expr + await pilot.press("enter") + await wait_for_table(pilot) + + # Apply a filter: rows with b in [100, 110) (column b holds 0..NROWS-1) + await submit_filter("b >= 100 and b < 110") + page = app.table_page + assert page["nrows"] == 10 + np.testing.assert_array_equal(page["data"]["b"], expected["b"][100:110]) + np.testing.assert_allclose(page["data"]["c"], expected["c"][100:110]) + + # An invalid expression notifies and keeps the previous filter + await submit_filter("nosuchcol > 1") + assert app.browser.get_filter("/level0/ctable") == "b >= 100 and b < 110" + assert app.table_page["nrows"] == 10 + + # Re-opening the modal prefills the active filter; escape cancels + await pilot.press("f") + await pilot.pause() + assert app.screen.query_one("#filter-input", Input).value == "b >= 100 and b < 110" + await pilot.press("escape") + await wait_for_table(pilot) + assert app.table_page["nrows"] == 10 + + # A filter matching nothing yields an empty (but live) grid + await submit_filter("b < 0") + assert app.table_page["nrows"] == 0 + + # An empty input clears the filter and restores the full table + await submit_filter("") + page = app.table_page + assert app.browser.get_filter("/level0/ctable") is None + assert page["nrows"] == NROWS + np.testing.assert_array_equal(page["data"]["b"], expected["b"][: page["stop"]]) + + # Escape on the data grid also clears an active filter + await submit_filter("b >= 100 and b < 110") + assert app.table_page["nrows"] == 10 + await pilot.press("escape") + await wait_for_table(pilot) + assert app.browser.get_filter("/level0/ctable") is None + assert app.table_page["nrows"] == NROWS + + # ── Column filtering ('/' modal) ───────────────────────────────── + + async def submit_column_filter(pattern: str) -> None: + await pilot.press("slash") + await pilot.pause() + assert isinstance(app.screen, FilterScreen) + app.screen.query_one("#filter-input", Input).value = pattern + await pilot.press("enter") + await wait_for_table(pilot) + + # 'v1' matches v10..v19; paging universe shrinks to those 10 columns + await submit_column_filter("v1") + page = app.table_page + assert page["ncols"] == 10 + assert page["columns"][0] == "v10" + assert all(name.startswith("v1") for name in page["columns"]) + + # The goto-column modal resolves names within the filtered set + await pilot.press("c") + await pilot.pause() + app.screen.query_one("#gotocol-input", Input).value = "v15" + await pilot.press("enter") + await wait_for_table(pilot) + assert app.table_page["columns"][0] == "v15" + + # Row and column filters combine (back at the first column window) + await pilot.press("s") + await wait_for_table(pilot) + await submit_filter("b >= 100 and b < 110") + page = app.table_page + assert page["nrows"] == 10 + assert page["ncols"] == 10 + np.testing.assert_array_equal(page["data"]["v10"], expected["v10"][100:110]) + + # A pattern matching nothing notifies and keeps the selection + await submit_column_filter("nosuchcol") + assert app.browser.get_column_filter("/level0/ctable") == "v1" + assert app.table_page["ncols"] == 10 + + # Escape clears one layer at a time: row filter first, then columns + await pilot.press("escape") + await wait_for_table(pilot) + assert app.browser.get_filter("/level0/ctable") is None + assert app.table_page["nrows"] == NROWS + assert app.table_page["ncols"] == 10 + await pilot.press("escape") + await wait_for_table(pilot) + assert app.browser.get_column_filter("/level0/ctable") is None + assert app.table_page["ncols"] == len(expected) diff --git a/tests/b2view/test_render.py b/tests/b2view/test_render.py new file mode 100644 index 000000000..102fdba95 --- /dev/null +++ b/tests/b2view/test_render.py @@ -0,0 +1,50 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Unit tests for b2view cell formatting (no app session needed).""" + +import numpy as np + +from blosc2.b2view.render import column_float_decimals, format_cell + + +def test_column_decimals_follow_max_magnitude(): + assert column_float_decimals(np.array([0.1, 5.0])) == 6 + assert column_float_decimals(np.array([0.1, 44.5])) == 5 + assert column_float_decimals(np.array([0.1, 448.5])) == 4 + assert column_float_decimals(np.array([0.1, 123456.7])) == 1 + assert column_float_decimals(np.array([0.1, 12345678.0])) == 0 + + +def test_column_decimals_special_cases(): + # All-zero columns read best as plain 0.0 + assert column_float_decimals(np.zeros(3)) == 1 + # Scientific-notation territory and non-float columns: per-value fallback + assert column_float_decimals(np.array([1e10])) is None + assert column_float_decimals(np.array([1e-9])) is None + assert column_float_decimals(np.arange(5)) is None + assert column_float_decimals(np.array(["a", "b"])) is None + assert column_float_decimals(np.array([])) is None + assert column_float_decimals(np.array([np.nan])) is None + # NaN/inf cells are ignored when picking the column format + assert column_float_decimals(np.array([np.nan, 1.5])) == 6 + + +def test_format_cell_uniform_decimals_align(): + vals = np.array([0.0, 1.5, -3.25, 448.5]) + decimals = column_float_decimals(vals) + cells = [format_cell(v, float_decimals=decimals) for v in vals] + assert cells == [" 0.0000", " 1.5000", " -3.2500", " 448.5000"] + # Same width and aligned decimal points for the whole column + assert len({len(cell) for cell in cells}) == 1 + assert len({cell.index(".") for cell in cells}) == 1 + + +def test_format_cell_without_column_context_unchanged(): + # The per-value fallback keeps its historical behavior + assert format_cell(np.float64(0.0)) == " 0.0" + assert format_cell(np.float64(1.5)) == " 1.500000" diff --git a/tests/b2view/tree_store_gen.py b/tests/b2view/tree_store_gen.py new file mode 100644 index 000000000..1a557c667 --- /dev/null +++ b/tests/b2view/tree_store_gen.py @@ -0,0 +1,142 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Deterministic TreeStore generator for the b2view tests. + +Trimmed copy of the creation part of ``bench/tree-store.py``, owned by the +test suite so it can evolve with the tests without affecting the bench tool. + +The store layout is a hierarchy of *nlevels* groups (``/level0``, ...), +each holding *nleaves* NDArray leaves plus one CTable. Leaf ``N`` is an +*N*-dimensional ``blosc2.linspace(0, 1, ...)`` array (leaf0 is a scalar) +with each side ``int(max_elems ** (1/N))``. Every value is predictable so +tests can check that a given viewport shows the expected data, and the +linspace/arange sequences compress very well, keeping files small. +""" + +from __future__ import annotations + +import dataclasses +import os +import shutil + +import numpy as np + +import blosc2 + +# ── Row schema for the CTable ──────────────────────────────────────────── + +# 4 base columns plus 16 extra numeric ones (v04..v19), wide enough to +# exceed the data panel viewport of b2view. +NCOLS = 20 + + +@dataclasses.dataclass +class _Row: + a: bool = blosc2.field(blosc2.bool(), default=False) + b: int = blosc2.field(blosc2.int64(), default=0) + c: float = blosc2.field(blosc2.float64(), default=0.0) + d: str = "" + v04: int = blosc2.field(blosc2.int64(), default=0) + v05: float = blosc2.field(blosc2.float64(), default=0.0) + v06: int = blosc2.field(blosc2.int64(), default=0) + v07: float = blosc2.field(blosc2.float64(), default=0.0) + v08: int = blosc2.field(blosc2.int64(), default=0) + v09: float = blosc2.field(blosc2.float64(), default=0.0) + v10: int = blosc2.field(blosc2.int64(), default=0) + v11: float = blosc2.field(blosc2.float64(), default=0.0) + v12: int = blosc2.field(blosc2.int64(), default=0) + v13: float = blosc2.field(blosc2.float64(), default=0.0) + v14: int = blosc2.field(blosc2.int64(), default=0) + v15: float = blosc2.field(blosc2.float64(), default=0.0) + v16: int = blosc2.field(blosc2.int64(), default=0) + v17: float = blosc2.field(blosc2.float64(), default=0.0) + v18: int = blosc2.field(blosc2.int64(), default=0) + v19: float = blosc2.field(blosc2.float64(), default=0.0) + + +def ctable_values(nrows: int) -> dict[str, np.ndarray]: + """Deterministic column values for the CTable; row *i* is predictable. + + Tests rely on these formulas to check that a given viewport shows the + expected values: + + - a: i % 2 == 0 + - b: i + - c: i * 1.5 + - d: "str_%06d" % i + - v{k}, even k: i * k + - v{k}, odd k: linspace(0, k, nrows)[i] == i * k / (nrows - 1) + """ + i = np.arange(nrows) + values: dict[str, np.ndarray] = { + "a": i % 2 == 0, + "b": i, + "c": i * 1.5, + "d": np.char.add("str_", np.char.zfill(i.astype("U6"), 6)), + } + for k in range(4, NCOLS): + values[f"v{k:02d}"] = i * k if k % 2 == 0 else np.linspace(0, k, num=nrows) + return values + + +def leaf_shape(ndim: int, max_elems: int) -> tuple[int, ...]: + """Return the shape of leaf *ndim*: () for 0, else int(max_elems^(1/ndim)) per side.""" + if ndim == 0: + return () + side = int(max_elems ** (1.0 / ndim)) + return (side,) * ndim + + +def create_store(nlevels: int, nleaves: int, max_elems: int, nrows: int, output: str) -> None: + """Create the test TreeStore at *output* (an existing file/dir is replaced).""" + if os.path.isdir(output): + shutil.rmtree(output) + elif os.path.exists(output): + os.remove(output) + + # Pre-build one array per unique dimensionality (leaf ``i`` → *i*‑d). + leaf_arrays: dict[int, blosc2.NDArray] = {} + for ndim in range(nleaves): + shape = leaf_shape(ndim, max_elems) + if ndim == 0: + # linspace does not support 0‑d outputs; use a 0‑d array + leaf_arrays[ndim] = blosc2.asarray(np.array(0.5, dtype=np.float64)) + else: + nelem = int(np.prod(shape)) + leaf_arrays[ndim] = blosc2.linspace(0, 1, num=nelem, shape=shape, dtype=np.float64) + + # Pre-populate a single CTable that is copied into every level. + tmpl_table = blosc2.CTable(_Row, expected_size=nrows, validate=False) + cols = ctable_values(nrows) + struct = np.empty(nrows, dtype=[(name, vals.dtype) for name, vals in cols.items()]) + for name, vals in cols.items(): + struct[name] = vals + tmpl_table.extend(struct, validate=False) + + tstore = blosc2.TreeStore(output, mode="w") + try: + tstore.vlmeta["author"] = "test-suite" + tstore.vlmeta["purpose"] = "testing" + for level in range(nlevels): + parent = f"/level{level}" + for leaf in range(nleaves): + arr = leaf_arrays[leaf] + # Diverse vlmeta types so the vlmeta panel has content + arr.vlmeta["is_even"] = leaf % 2 == 0 + arr.vlmeta["index"] = leaf + arr.vlmeta["label"] = f"leaf_{leaf}" + arr.vlmeta["tags"] = [f"tag_{leaf}", f"tag_{leaf + 1}"] + tstore[f"{parent}/leaf{leaf}"] = arr + + table_key = f"{parent}/ctable" + tstore[table_key] = tmpl_table + ct = tstore[table_key] + ct.vlmeta["description"] = f"Level {level} CTable" + ct.vlmeta["ncols"] = tmpl_table.ncols + finally: + tstore.close() diff --git a/tests/ctable/test_sort_by.py b/tests/ctable/test_sort_by.py index 2bb43e366..b1423279f 100644 --- a/tests/ctable/test_sort_by.py +++ b/tests/ctable/test_sort_by.py @@ -354,5 +354,65 @@ def test_sort_readonly_inplace_raises(): shutil.rmtree(path, ignore_errors=True) +# =========================================================================== +# Regression: sort_by on an unprojected view must not gather all columns +# =========================================================================== + + +@dataclass +class WideSortRow: + a: int = blosc2.field(blosc2.int64(), default=0) + b: float = blosc2.field(blosc2.float64(), default=0.0) + c: float = blosc2.field(blosc2.float64(), default=0.0) + d: str = "" + e: int = blosc2.field(blosc2.int64(), default=0) + + +def _loaded_columns(table) -> set[str]: + """Columns whose payload has actually been opened. + + ``_cols`` is a ``_LazyColumnDict``; bypassing its ``__contains__`` with + ``dict.__contains__`` reveals what was loaded without forcing a load. + """ + return {name for name in table.col_names if dict.__contains__(table._cols, name)} + + +def test_sort_unprojected_view_opens_only_needed_columns(tmp_path): + """``where(cond).sort_by(key)`` without ``columns=`` used to gather every + column of the view (~30x slower than projecting first). It must open only + the condition and sort-key columns, deferring the rest until read.""" + n = 1000 + i = np.arange(n) + data = np.empty(n, dtype=[("a", "= n - 100).sort_by("b") + assert _loaded_columns(t) <= {"a", "b"} + assert _loaded_columns(res) <= {"a", "b"} + + # Deferred columns are still served correctly, on demand only + mask = data["a"] >= n - 100 + order = np.argsort(data["b"][mask], kind="stable") + np.testing.assert_array_equal(res["e"][:], data["e"][mask][order]) + loaded = _loaded_columns(res) + assert "c" not in loaded + assert "d" not in loaded + finally: + t.close() + + if __name__ == "__main__": pytest.main(["-v", __file__]) diff --git a/tests/test_b2view_model.py b/tests/test_b2view_model.py index bb0e7b080..3d782841b 100644 --- a/tests/test_b2view_model.py +++ b/tests/test_b2view_model.py @@ -128,6 +128,8 @@ def test_store_browser_uses_grid_preview_for_2d_ndarray(tmp_path): def test_ctable_preview_buffer_reuses_loaded_rows(tmp_path): pytest.importorskip("textual", reason="b2view TUI requires textual") + if blosc2.IS_WASM: + pytest.skip("instantiating a Textual app needs a terminal driver (termios)") path = tmp_path / "table.b2z" persistent = blosc2.CTable(Row, urlpath=str(path), mode="w") for i in range(100): diff --git a/todo/b2view.md b/todo/b2view.md new file mode 100644 index 000000000..1d1b540dd --- /dev/null +++ b/todo/b2view.md @@ -0,0 +1,89 @@ +# b2view: improvements tracker + +Running list of possible improvements for the `b2view` TUI. The original +design document lives in `plans/b2view.md`; this file tracks incremental +work discovered while using and testing the viewer. + +Tests live in `tests/b2view/` (marker `tui`); see the note at the top of +`tests/b2view/test_basics.py` before adding new ones. + +## Pending + +### Navigation + +- [ ] Row paging can lose page alignment after dim-mode single-row scrolls + (`_scroll_navigable_viewport` shifts by 1); consider re-aligning on the + next page up/down, as column paging does now. + +### Data panel + +- [ ] CTable expensive columns (list/struct/object) show a `<...; skipped>` + placeholder; offer on-demand decoding (e.g. a key to materialize the + column, or decode just the cursor row). +- [ ] SChunk preview is not implemented (`model.preview` returns a message). + +### Testing + +- [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) + if rendering glitches become a recurring theme. + +## Done + +- 2026-06-12: Pilot-based test suite (`tests/b2view/test_basics.py`) with a + deterministic store generator (`tests/b2view/tree_store_gen.py`); marker + `tui`. +- 2026-06-12: CTable column paging (wide tables were unreachable past the + first window); `preview_ctable` gained `col_start`/`ncols` bookkeeping. +- 2026-06-12: Viewport-consistency reload — the first page of a node was + sized before layout settled (CLI fallbacks vs real viewport), making + paging windows drift; also handles terminal resize. +- 2026-06-12: Column paging windows are aligned to page-size multiples + (ragged last window no longer shifts subsequent pages); `end` jumps to + the last aligned window, mirroring `b` for rows. +- 2026-06-12: Two-pass column fit — the preview fetches a generous candidate + window, then trims to the columns whose *measured* rendered widths fit the + pane (was a fixed ~11 chars/column estimate that wasted half the width on + narrow bool/int columns). Paging right starts at the first hidden column; + paging left and `end` fit whole columns backward; windows are stable + within a row buffer (widths measured over the buffer, not the visible + page). Superseded the fixed-multiple alignment policy above. +- 2026-06-12: Uniform decimals per float column — the decimal count is + chosen once per column from its max magnitude in the buffer + (`column_float_decimals` in render.py) instead of per value, so decimal + points align down the column; zeros are formatted like their neighbors + (all-zero columns still show plain 0.0). Unit tests in + `tests/b2view/test_render.py`. +- 2026-06-12: Row paging/jumps (page up/down, `t`/`b`, `g`oto, dim-mode + changes) keep the cursor on its current column; only selecting a new node + resets it to the first column. +- 2026-06-12: `s`/`e` keys jump to the start/end column window (aliases of + Home/End, which were undiscoverable); the data panel subtitle now lists + all jump keys: `rows: t/b/g | cols: s/e`. +- 2026-06-12: `?` opens a help screen listing all keys grouped by area + (panels, tree, grid rows/columns, dim mode); shown in the footer, closed + with esc/`?`/`q`. +- 2026-06-12: `c` opens a go-to-column modal: accepts a column index, and + for CTables also an exact column name or a unique name prefix; the target + becomes the first visible column, keeping the row position. +- 2026-06-12: Resize Pilot test (`pilot.resize_terminal`) — it immediately + caught that the resize handler lived on the App, which never receives + Resize events; moved to BufferedDataTable.on_resize, so the windows now + re-fit on terminal resize and panel maximize/restore for real. +- 2026-06-12: The terminal owns the mouse by default, so native text + selection/copy works; `--mouse` lets b2view capture it instead + (click-to-focus, wheel scrolls the data grid by half a page, paging at + the edges via the same path as the arrow keys). +- 2026-06-12: Dim-mode index/viewport movements clamp at the boundaries + instead of wrapping (left/right dimension *selection* still cycles); + navigable viewports clamp to the last full page / whole-column window. +- 2026-06-12: CTable row filtering — `f` opens a modal that takes the same + string expressions as `CTable.where()` (dotted nested names, and/or) and + pages through the matching view; escape or an empty expression clears, + filters are remembered per node (`StoreBrowser.set_filter`), and the data + header shows the active filter plus the unfiltered total. +- 2026-06-12: CTable column filtering — `/` filters the visible columns by + case-insensitive substring (`StoreBrowser.set_column_filter`); column + paging, the two-pass width fit and the `c` goto-column modal all operate + on the filtered subset (`preview_ctable` already took a `columns` + universe). Combines freely with the row filter; escape clears one layer + per press (dim mode, then rows, then columns).