From f77e88a7b05b5749fb9f4a9eed268a782c75d627 Mon Sep 17 00:00:00 2001 From: svij Date: Mon, 18 May 2026 22:25:10 +0000 Subject: [PATCH 1/3] fix randomness --- gigl/common/utils/compute/random.py | 84 ++++++++++++++++++----------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/gigl/common/utils/compute/random.py b/gigl/common/utils/compute/random.py index 656f3e233..f67132d2f 100644 --- a/gigl/common/utils/compute/random.py +++ b/gigl/common/utils/compute/random.py @@ -1,49 +1,69 @@ +""" +Matches the ``set_seed(seed, deterministic=False)`` shape used by +Hugging Face Transformers, MMEngine, and Accelerate; follows the recipe +at https://pytorch.org/docs/stable/notes/randomness.html. +""" + import os import random +from typing import Final + +import numpy as np +import torch from gigl.common.logger import Logger logger = Logger() +_DEFAULT_SEED: Final[int] = 42 # Answer to the Ultimate Question. +# Required on CUDA >= 10.2 when use_deterministic_algorithms(True) is set, +# otherwise cuBLAS matmuls raise RuntimeError. ":4096:8" trades ~24 MiB of +# extra cuBLAS workspace for keeping perf reasonable vs ":16:8". +_CUBLAS_WORKSPACE_CONFIG: Final[str] = ":4096:8" -def make_compute_deterministic_and_set_seed( - seed: int = 42, # Answer to the Ultimate Question of Life, The Universe, and Everything - should_consider_numpy=True, - should_consider_torch=False, - should_consider_tensorflow=False, -): - logger.info( - """ - Ensure data loading is also deterministic and you are using deterministic algorithms - for relevant frameworks, otherwise nondeterminism will persist - """ - ) - # Setting PYTHONHASHSEED doesn't seem like it actually does anything - # See: https://stackoverflow.com/questions/30585108/disable-hash-randomization-from-within-python-program - # os.environ["PYTHONHASHSEED"] = "0" +def seed_everything( + seed: int = _DEFAULT_SEED, + should_enable_expensive_deterministic_compute: bool = False, +) -> None: + """Seed Python / NumPy / PyTorch RNGs, optionally enforce deterministic torch ops. - random.seed(seed) + What gets seeded: + + - ``random.seed(seed)`` — Python stdlib. + - ``np.random.seed(seed)`` — NumPy global RNG. + - ``torch.manual_seed(seed)`` — CPU **and all CUDA devices** + (``torch.manual_seed`` calls ``torch.cuda.manual_seed_all`` internally. + Also covers PyTorch Geometric. - if should_consider_numpy: - import numpy as np + When ``should_enable_expensive_deterministic_compute=True`` (opt-in; default False because it costs + throughput and should not be enabled for training or for production inference - can be used for debugging purposes. - np.random.seed(seed) + - Important: Graph Sampling currently do not follow determism outlined here. + Example: + >>> seed_everything(42) + 42 - if should_consider_torch: - import torch - import torch.backends.cudnn + Args: + seed: RNG seed. + deterministic: If True, also enforces bitwise-deterministic torch + ops (cudnn flags, ``use_deterministic_algorithms``, + ``CUBLAS_WORKSPACE_CONFIG``). Default False — most training + pipelines want seeded RNGs without paying the throughput cost. - torch.manual_seed(seed) + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if should_enable_expensive_deterministic_compute: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = _CUBLAS_WORKSPACE_CONFIG torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.use_deterministic_algorithms(True) - - if should_consider_tensorflow: - import tensorflow as tf - - tf.random.set_seed(seed) - os.environ["TF_DETERMINISTIC_OPS"] = "1" - os.environ["TF_CUDNN_DETERMINISTIC"] = "1" - tf.config.threading.set_inter_op_parallelism_threads(1) - tf.config.threading.set_intra_op_parallelism_threads(1) + logger.info( + f"seed_everything: seeded python/numpy/torch with seed={seed}; " + f"expensive deterministic algorithms ON; " + f"throughput will degrade" + ) + else: + logger.info(f"seed_everything: seeded python/numpy/torch with seed={seed}") From 610f71d27bf7d8d37de21b0462fafc08795a11b7 Mon Sep 17 00:00:00 2001 From: svij Date: Mon, 18 May 2026 22:28:08 +0000 Subject: [PATCH 2/3] kog warn --- gigl/common/utils/compute/random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gigl/common/utils/compute/random.py b/gigl/common/utils/compute/random.py index f67132d2f..093c9c297 100644 --- a/gigl/common/utils/compute/random.py +++ b/gigl/common/utils/compute/random.py @@ -60,7 +60,7 @@ def seed_everything( torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.use_deterministic_algorithms(True) - logger.info( + logger.warning( f"seed_everything: seeded python/numpy/torch with seed={seed}; " f"expensive deterministic algorithms ON; " f"throughput will degrade" From 19f7d5b204b9ae59885b990e9407715b9cdd02f2 Mon Sep 17 00:00:00 2001 From: svij Date: Wed, 20 May 2026 01:14:21 +0000 Subject: [PATCH 3/3] spelling --- gigl/common/utils/compute/random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gigl/common/utils/compute/random.py b/gigl/common/utils/compute/random.py index 093c9c297..59ea7bd0b 100644 --- a/gigl/common/utils/compute/random.py +++ b/gigl/common/utils/compute/random.py @@ -39,7 +39,7 @@ def seed_everything( When ``should_enable_expensive_deterministic_compute=True`` (opt-in; default False because it costs throughput and should not be enabled for training or for production inference - can be used for debugging purposes. - - Important: Graph Sampling currently do not follow determism outlined here. + - Important: Graph Sampling currently do not follow determinism outlined here. Example: >>> seed_everything(42) 42