From 1cb8203a000c148a4789e7c8bebcc437d5a6e8fc Mon Sep 17 00:00:00 2001 From: wjddn279 Date: Tue, 28 Apr 2026 13:52:26 +0900 Subject: [PATCH] Add memray_detailed_tracing option for deeper memory profiling --- airflow-core/docs/howto/memory-profiling.rst | 26 +++++++++++ .../src/airflow/config_templates/config.yml | 15 +++++++ .../src/airflow/utils/memray_utils.py | 5 ++- .../tests/unit/utils/test_memray_utils.py | 45 ++++++++++++++++++- 4 files changed, 89 insertions(+), 2 deletions(-) diff --git a/airflow-core/docs/howto/memory-profiling.rst b/airflow-core/docs/howto/memory-profiling.rst index 1013ef9a624c6..0bfb5241ead81 100644 --- a/airflow-core/docs/howto/memory-profiling.rst +++ b/airflow-core/docs/howto/memory-profiling.rst @@ -84,6 +84,32 @@ Or set it via environment variable: ``memray_trace_components`` to an empty string (or unset the environment variable) and restart the affected components. +Capturing More Detailed Traces +"""""""""""""""""""""""""""""" + +If the default trace does not give you enough information to identify the source of +a memory issue, enabling ``memray_detailed_tracing`` provides deeper insight. + +By default, Memray only records allocations that reach the system allocator. To also +capture C/C++ stack frames and small ``pymalloc`` allocations, set +``memray_detailed_tracing`` to ``True``: + +.. code-block:: ini + + [profiling] + memray_trace_components = scheduler + memray_detailed_tracing = True + +This enables Memray's ``native_traces`` (C/C++ frames from compiled extensions such as +numpy or pandas; most accurate on Linux, less precise on macOS) and +``trace_python_allocators`` (small short-lived Python objects served from existing +``pymalloc`` arenas). + +.. warning:: + + Detailed tracing substantially increases overhead and can produce profile files + several gigabytes in size. Enable it for short, focused sessions only. + Step-by-Step Profiling Guide ----------------------------- diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index c6dc3e97f292e..85d03096e86cf 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -3020,3 +3020,18 @@ profiling: type: string example: "scheduler,api,dag_processor" default: ~ + memray_detailed_tracing: + description: | + Whether to enable memray's ``native_traces`` and ``trace_python_allocators`` options + when ``memray_trace_components`` is set. Captures C/C++ stack frames for compiled + extensions (numpy, pandas, etc.) and small ``pymalloc`` allocations that would + otherwise be invisible. + + This adds significant runtime overhead and produces much larger profile files, so + leave it disabled unless the default trace lacks the detail you need. Native symbol + resolution is most accurate on Linux and less precise on macOS. See + https://bloomberg.github.io/memray/api.html for the underlying ``Tracker`` options. + version_added: 3.3.0 + type: boolean + example: ~ + default: "False" diff --git a/airflow-core/src/airflow/utils/memray_utils.py b/airflow-core/src/airflow/utils/memray_utils.py index 86d9e4e84935c..8ff4ddb146258 100644 --- a/airflow-core/src/airflow/utils/memray_utils.py +++ b/airflow-core/src/airflow/utils/memray_utils.py @@ -55,6 +55,7 @@ def wrapper(*args: PS.args, **kwargs: PS.kwargs) -> RT: # type: ignore[return] _memray_trace_components = conf.getenumlist( "profiling", "memray_trace_components", MemrayTraceComponents ) + is_detailed_tracing = conf.getboolean("profiling", "memray_detailed_tracing") if component not in _memray_trace_components: return func(*args, **kwargs) @@ -63,7 +64,9 @@ def wrapper(*args: PS.args, **kwargs: PS.kwargs) -> RT: # type: ignore[return] profile_path = f"{AIRFLOW_HOME}/{component.value}_memory.bin" with memray.Tracker( - profile_path, + file_name=profile_path, + native_traces=is_detailed_tracing, + trace_python_allocators=is_detailed_tracing, ): log.info("Memray tracing enabled for %s. Output: %s", component.value, profile_path) return func(*args, **kwargs) diff --git a/airflow-core/tests/unit/utils/test_memray_utils.py b/airflow-core/tests/unit/utils/test_memray_utils.py index e04a0cb801320..e447146dc5501 100644 --- a/airflow-core/tests/unit/utils/test_memray_utils.py +++ b/airflow-core/tests/unit/utils/test_memray_utils.py @@ -121,12 +121,55 @@ def test_memray_tracker_activated_when_enabled(self): result = decorated_function("arg1", "arg2", kwarg1="value1") expected_profile_path = f"{AIRFLOW_HOME}/{MemrayTraceComponents.scheduler.value}_memory.bin" - self.mock_memray_module.Tracker.assert_called_once_with(expected_profile_path) + self.mock_memray_module.Tracker.assert_called_once_with( + file_name=expected_profile_path, + native_traces=False, + trace_python_allocators=False, + ) self.mock_tracker.__enter__.assert_called_once() self.mock_function.assert_called_once_with("arg1", "arg2", kwarg1="value1") self.mock_tracker.__exit__.assert_called_once() assert result == "test_result" + @conf_vars( + { + ("profiling", "memray_trace_components"): "scheduler", + ("profiling", "memray_detailed_tracing"): "True", + } + ) + def test_detailed_tracing_enables_native_and_python_allocators(self): + """ + Verify that memray_detailed_tracing=True turns on native_traces and + trace_python_allocators on the Tracker. + """ + decorated_function = enable_memray_trace(MemrayTraceComponents.scheduler)(self.mock_function) + decorated_function() + + expected_profile_path = f"{AIRFLOW_HOME}/{MemrayTraceComponents.scheduler.value}_memory.bin" + self.mock_memray_module.Tracker.assert_called_once_with( + file_name=expected_profile_path, + native_traces=True, + trace_python_allocators=True, + ) + + @conf_vars( + { + ("profiling", "memray_trace_components"): "scheduler", + ("profiling", "memray_detailed_tracing"): "False", + } + ) + def test_detailed_tracing_disabled_keeps_tracker_options_off(self): + """ + Verify that when memray_detailed_tracing=False both Tracker options stay off + even though tracing for the component is enabled. + """ + decorated_function = enable_memray_trace(MemrayTraceComponents.scheduler)(self.mock_function) + decorated_function() + + _, kwargs = self.mock_memray_module.Tracker.call_args + assert kwargs["native_traces"] is False + assert kwargs["trace_python_allocators"] is False + @conf_vars({("profiling", "memray_trace_components"): "scheduler,api,dag_processor"}) def test_function_metadata_preserved_after_decoration(self): """