buckaroo-data · paddymul · Feb 23, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -402,7 +402,7 @@ jobs:
     name: Server Playwright Tests
     needs: [BuildWheel]
     runs-on: depot-ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v6
       - name: Install uv

diff --git a/README.md b/README.md
@@ -276,3 +276,4 @@ Have you had a good experience with this project? Why not share some love and co
 We welcome [issue reports](../../issues); be sure to choose the proper issue template for your issue, so that we can be sure you're providing the necessary information.
 
 
+
diff --git a/buckaroo/__init__.py b/buckaroo/__init__.py
@@ -2,9 +2,15 @@
 import inspect
 import platform
 from ._version import __version__
-from .buckaroo_widget import BuckarooWidget, BuckarooInfiniteWidget, AutocleaningBuckaroo
-from .dataflow.widget_extension_utils import DFViewer
-from .widget_utils import is_in_ipython, is_in_marimo, enable, disable, determine_jupter_env
+try:
+    from .buckaroo_widget import BuckarooWidget, BuckarooInfiniteWidget, AutocleaningBuckaroo
+    from .widget_utils import is_in_ipython, is_in_marimo, enable, disable, determine_jupter_env
+    from .dataflow.widget_extension_utils import DFViewer
+    _HAS_PANDAS = True
+except ImportError:
+    # buckaroo_widget, widget_utils, and widget_extension_utils require pandas;
+    # skip in server-only mode
+    _HAS_PANDAS = False
 from .read_utils import read
 try:
     from .file_cache.cache_utils import (
@@ -23,30 +29,31 @@
 
 
 
-def is_notebook_compatible():
-    jupyter_env = determine_jupter_env()
-    if jupyter_env == "jupyter-notebook":
-        try:
+if _HAS_PANDAS:
+    def is_notebook_compatible():
+        jupyter_env = determine_jupter_env()
+        if jupyter_env == "jupyter-notebook":
+            try:
+                import notebook
+                return notebook.version_info[0] >= 6
+            except:
+                pass
+            return False
+        else:
+            return True
+
+    def warn_on_incompatible():
+        if not is_notebook_compatible():
             import notebook
-            return notebook.version_info[0] >= 6
-        except:
-            pass
-        return False
-    else:
-        return True
+            print("Buckaroo is compatible with jupyter notebook > 6, or jupyterlab >3.6.0")
+            print("You seem to be executing this in jupyter notebook version %r" % str(notebook.__version__))
+            print("You can upgrade to notebook 7 by running 'pip install --upgrade notebook'")
+            print("Or you can try running jupyter lab with 'jupyter lab'")
 
-def warn_on_incompatible():
-    if not is_notebook_compatible():
-        import notebook
-        print("Buckaroo is compatible with jupyter notebook > 6, or jupyterlab >3.6.0")
-        print("You seem to be executing this in jupyter notebook version %r" % str(notebook.__version__))
-        print("You can upgrade to notebook 7 by running 'pip install --upgrade notebook'")
-        print("Or you can try running jupyter lab with 'jupyter lab'")
-
-
 
 def debug_packages():
     print("Selected Jupyter core packages...")
+    from .widget_utils import determine_jupter_env
     jupyter_env = determine_jupter_env()
     print("executing in %s " % jupyter_env)
     packages = [
@@ -106,33 +113,34 @@ def is_running_in_mp_timeout() -> bool:
         return False
 
 has_initted = False
-try:
-    if is_in_marimo():
-        print("Buckaroo has been enabled as the default DataFrame viewer.  To return to default dataframe visualization use `from buckaroo.marimo_utils import marimo_unmonkeypatch; marimo_unmonkeypatch()`")
-        from buckaroo.marimo_utils import marimo_monkeypatch
-        marimo_monkeypatch()
-
-    elif is_in_ipython():
-        enable()
-        print("Buckaroo has been enabled as the default DataFrame viewer.  To return to default dataframe visualization use `from buckaroo import disable; disable()`")
-
-    else:
-        if not is_running_in_mp_timeout() and not has_initted:
-            print("must be running inside ipython to enable default display via enable()")
-            warn_on_incompatible()
+if _HAS_PANDAS:
     try:
-        import polars
-        if not platform.system() == "Windows":
-            from buckaroo.read_utils import read, read_df
+        if is_in_marimo():
+            print("Buckaroo has been enabled as the default DataFrame viewer.  To return to default dataframe visualization use `from buckaroo.marimo_utils import marimo_unmonkeypatch; marimo_unmonkeypatch()`")
+            from buckaroo.marimo_utils import marimo_monkeypatch
+            marimo_monkeypatch()
+
+        elif is_in_ipython():
+            enable()
+            print("Buckaroo has been enabled as the default DataFrame viewer.  To return to default dataframe visualization use `from buckaroo import disable; disable()`")
+
         else:
-            #FIXME post some error message here explianing that these features aren't available on windows
-            pass
+            if not is_running_in_mp_timeout() and not has_initted:
+                print("must be running inside ipython to enable default display via enable()")
+                warn_on_incompatible()
+        try:
+            import polars
+            if not platform.system() == "Windows":
+                from buckaroo.read_utils import read, read_df
+            else:
+                #FIXME post some error message here explianing that these features aren't available on windows
+                pass
 
-    except ImportError:
-        #if polars is installed, make read available as a base import
-        pass
-except:
-    print("error enabling buckaroo as default display formatter for dataframes (ignore message during testing/builds")
-finally:
-    has_initted = True
+        except ImportError:
+            #if polars is installed, make read available as a base import
+            pass
+    except:
+        print("error enabling buckaroo as default display formatter for dataframes (ignore message during testing/builds")
+    finally:
+        has_initted = True
 
diff --git a/buckaroo/buckaroo_widget.py b/buckaroo/buckaroo_widget.py
@@ -26,7 +26,7 @@
 from .pluggable_analysis_framework.col_analysis import ColAnalysis
 from buckaroo.extension_utils import copy_extend
 
-from .serialization_utils import EMPTY_DF_WHOLE, check_and_fix_df, pd_to_obj, to_parquet, sd_to_parquet_b64
+from .serialization_utils import EMPTY_DF_WHOLE, check_and_fix_df, pd_to_obj, to_arrow_ipc, sd_to_ipc_b64
 from .dataflow.dataflow import CustomizableDataflow
 from .dataflow.dataflow_extras import (Sampling, exception_protect)
 from .dataflow.styling_core import (ComponentConfig, DFViewerConfig, DisplayArgs, OverrideColumnConfig, PinnedRowConfig, StylingAnalysis, merge_column_config, EMPTY_DFVIEWER_CONFIG)
@@ -242,7 +242,7 @@ def _sd_to_jsondf(self, sd):
 
         Exists so this can be overridden for polars/geopandas.
         """
-        return sd_to_parquet_b64(sd)
+        return sd_to_ipc_b64(sd)
 
 
 
@@ -395,11 +395,11 @@ def _handle_payload_args(self, new_payload_args):
                 converted_sort_column = processed_sd[sort]['orig_col_name']
                 sorted_df = processed_df.sort_values(by=[converted_sort_column], ascending=ascending)
                 slice_df = sorted_df[start:end]
-                self.send({ "type": "infinite_resp", 'key':new_payload_args, 'data':[], 'length':len(processed_df)}, [to_parquet(slice_df)])
+                self.send({ "type": "infinite_resp", 'key':new_payload_args, 'data':[], 'length':len(processed_df)}, [to_arrow_ipc(slice_df)])
             else:
                 slice_df = processed_df[start:end]
                 self.send({ "type": "infinite_resp", 'key':new_payload_args,
-                            'data': [], 'length':len(processed_df)}, [to_parquet(slice_df) ])
+                            'data': [], 'length':len(processed_df)}, [to_arrow_ipc(slice_df) ])
 
                 second_pa = new_payload_args.get('second_request')
                 if not second_pa:
@@ -409,7 +409,7 @@ def _handle_payload_args(self, new_payload_args):
                 extra_df = processed_df[extra_start:extra_end]
                 self.send(
                     {"type": "infinite_resp", 'key':second_pa, 'data':[], 'length':len(processed_df)},
-                    [to_parquet(extra_df)]
+                    [to_arrow_ipc(extra_df)]
                 )
         except Exception as e:
             logger.error(e)

diff --git a/buckaroo/customizations/analysis.py b/buckaroo/customizations/analysis.py
@@ -34,15 +34,15 @@ def get_mode(ser):
 
         # but in jupyterlite envs, we have a recent version of pandas
         # without this problem
-        if not pd.api.types.is_numeric():
+        if not pd.api.types.is_numeric_dtype(ser):
             return np.nan
         mode_raw = ser.mode()
         if len(mode_raw) == 0:
             return np.nan
         return mode_raw.values[0]
-        
+
     try:
-        if not pd.api.types.is_numeric():
+        if not pd.api.types.is_numeric_dtype(ser):
             return np.nan
         mode_raw = ser.mode()
         if len(mode_raw) == 0:

diff --git a/buckaroo/dataflow/column_executor_dataflow.py b/buckaroo/dataflow/column_executor_dataflow.py
@@ -19,7 +19,7 @@
 from buckaroo.file_cache.multiprocessing_executor import MultiprocessingExecutor
 from buckaroo.file_cache.paf_column_executor import PAFColumnExecutor
 from .abc_dataflow import ABCDataflow
-from buckaroo.serialization_utils import sd_to_parquet_b64
+from buckaroo.serialization_utils import sd_to_ipc_b64
 
 logger = logging.getLogger("buckaroo.dataflow")
 
@@ -272,7 +272,7 @@ def _listener(note: ProgressNotification) -> None:
                     current_summary = self.summary_sd.copy() if self.summary_sd else {}
                     current_summary.update(aggregated_summary)
                     self.summary_sd = current_summary
-                    self.df_data_dict = {'main': [], 'all_stats': sd_to_parquet_b64(current_summary), 'empty': []}
+                    self.df_data_dict = {'main': [], 'all_stats': sd_to_ipc_b64(current_summary), 'empty': []}
                     # Update merged_sd as stats come in (important for async executors)
                     # Merge with existing to preserve any cached columns
                     current_merged = self.merged_sd.copy() if self.merged_sd else {}

diff --git a/buckaroo/dataflow/dataflow.py b/buckaroo/dataflow/dataflow.py
@@ -6,7 +6,7 @@
 from traitlets import Unicode, Any, observe, Dict
 
 from buckaroo.pluggable_analysis_framework.col_analysis import ColAnalysis, SDType
-from ..serialization_utils import pd_to_obj, sd_to_parquet_b64
+from ..serialization_utils import pd_to_obj, sd_to_ipc_b64
 from buckaroo.pluggable_analysis_framework.utils import (filter_analysis)
 from buckaroo.pluggable_analysis_framework.df_stats_v2 import DfStatsV2
 from .autocleaning import SentinelAutocleaning
@@ -420,7 +420,7 @@ def _sd_to_jsondf(self, sd:SDType):
 
         Exists so this can be overridden for polars/geopandas.
         """
-        return sd_to_parquet_b64(sd)
+        return sd_to_ipc_b64(sd)
 
     def _df_to_obj(self, df:pd.DataFrame) -> TDict[str, TAny]:
         return pd_to_obj(self.sampling_klass.serialize_sample(df))

diff --git a/buckaroo/df_util.py b/buckaroo/df_util.py
@@ -1,7 +1,9 @@
-import pandas as pd
-from typing import Iterable, Union, List, Tuple, Dict
+from __future__ import annotations
+from typing import Iterable, Union, List, Tuple, Dict, TYPE_CHECKING
 from typing_extensions import TypeAlias
 
+if TYPE_CHECKING:
+    import pandas as pd
 
 ColIdentifier:TypeAlias = Union[Iterable[str], str]
 

diff --git a/buckaroo/geopandas_buckaroo.py b/buckaroo/geopandas_buckaroo.py
@@ -4,7 +4,7 @@
 from buckaroo.customizations.styling import DefaultMainStyling, StylingAnalysis
 from buckaroo.pluggable_analysis_framework.pluggable_analysis_framework import ColAnalysis
 from .dataflow.dataflow_extras import (Sampling)
-from buckaroo.serialization_utils import pd_to_obj, sd_to_parquet_b64
+from buckaroo.serialization_utils import pd_to_obj, sd_to_ipc_b64
 from buckaroo.customizations.analysis import (TypingStats)
 import geopandas
 
@@ -65,7 +65,7 @@ def _sd_to_jsondf(self, sd):
         temp_sd = sd.copy()
         if 'index' in temp_sd:
             del temp_sd['index']
-        return sd_to_parquet_b64(temp_sd)
+        return sd_to_ipc_b64(temp_sd)
 
 class GeopandasBuckarooWidget(GeopandasBase):
     pass

diff --git a/buckaroo/lazy_infinite_polars_widget.py b/buckaroo/lazy_infinite_polars_widget.py
@@ -9,7 +9,6 @@
 import datetime
 from datetime import timedelta
 from typing import Any, Dict, List, Optional, Type
-from io import BytesIO
 from pathlib import Path
 import os
 import traceback
@@ -29,7 +28,7 @@
 from buckaroo.styling_helpers import obj_, pinned_histogram
 from .pluggable_analysis_framework.polars_analysis_management import PolarsAnalysis
 from .df_util import old_col_new_col
-from .serialization_utils import sd_to_parquet_b64
+from .serialization_utils import sd_to_ipc_b64
 from buckaroo.file_cache.base import AbstractFileCache, Executor as _SyncExec, ExecutorLog  # type: ignore
 from buckaroo.file_cache.multiprocessing_executor import MultiprocessingExecutor as _ParExec
 from buckaroo.file_cache.cache_utils import get_global_file_cache, get_global_executor_log
@@ -718,8 +717,8 @@ def _listener(note):
         # Ensure summary is ready for initial display (checks if computation completed synchronously)
         summary_sd = self.ensure_initial_summary_for_display(initial_summary_sd)
         summary_rows = self._summary_to_rows(summary_sd)
-        if isinstance(summary_rows, dict) and summary_rows.get('format') == 'parquet_b64':
-            logger.info("Initial all_stats prepared as parquet_b64, b64_len=%s", len(summary_rows.get('data', '')))
+        if isinstance(summary_rows, dict) and summary_rows.get('format') in ('ipc_b64', 'parquet_b64'):
+            logger.info("Initial all_stats prepared as %s, b64_len=%s", summary_rows.get('format'), len(summary_rows.get('data', '')))
         else:
             logger.info(
                 "Initial all_stats prepared: len=%s sample=%s",
@@ -765,7 +764,7 @@ def _summary_to_rows(self, summary: Dict[str, Dict[str, Any]]):
         """Convert summary dict to parquet-b64 tagged payload (or JSON fallback)."""
         if not summary:
             return []
-        return sd_to_parquet_b64(summary)
+        return sd_to_ipc_b64(summary)
 
     # selection and retry now delegated to dataflow
     def _build_column_config(self, summary: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -792,11 +791,15 @@ def _prepare_df_for_serialization(self, df: pl.DataFrame) -> pl.DataFrame:
                 select_clauses.append(pl.col(orig).alias(rw))
         return df.select(select_clauses)
 
-    def _to_parquet(self, df: pl.DataFrame) -> bytes:
-        out = BytesIO()
-        self._prepare_df_for_serialization(df).write_parquet(out, compression='uncompressed')
-        out.seek(0)
-        return out.read()
+    def _to_arrow_ipc(self, df: pl.DataFrame) -> bytes:
+        import pyarrow as pa
+        import pyarrow.ipc as ipc
+        table = self._prepare_df_for_serialization(df).to_arrow()
+        sink = pa.BufferOutputStream()
+        writer = ipc.new_stream(sink, table.schema)
+        writer.write_table(table)
+        writer.close()
+        return sink.getvalue().to_pybytes()
 
     def _handle_payload_args(self, new_payload_args: Dict[str, Any]) -> None:
         start, end = new_payload_args.get('start', 0), new_payload_args.get('end', 0)
@@ -831,7 +834,7 @@ def _handle_payload_args(self, new_payload_args: Dict[str, Any]) -> None:
                 start, end, len(slice_df), self.df_meta['total_rows']
             )
             self.send({"type": "infinite_resp", 'key': new_payload_args, 'data': [], 'length': self.df_meta['total_rows']},
-                      [self._to_parquet(slice_df)])
+                      [self._to_arrow_ipc(slice_df)])
 
             second_pa = new_payload_args.get('second_request')
             if second_pa:
@@ -847,7 +850,7 @@ def _handle_payload_args(self, new_payload_args: Dict[str, Any]) -> None:
                         s2, e2, len(slice2), self.df_meta['total_rows']
                     )
                     self.send({"type": "infinite_resp", 'key': second_pa, 'data': [], 'length': self.df_meta['total_rows']},
-                              [self._to_parquet(slice2)])
+                              [self._to_arrow_ipc(slice2)])
         except Exception as e:
             stack_trace = traceback.format_exc()
             self.send({"type": "infinite_resp", 'key': new_payload_args, 'data': [], 'error_info': stack_trace, 'length': 0}, [])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -276,3 +276,4 @@ Have you had a good experience with this project? Why not share some love and co
		We welcome [issue reports](../../issues); be sure to choose the proper issue template for your issue, so that we can be sure you're providing the necessary information.