From 33f5d4c0aecdf755746191421b003233cee7eb55 Mon Sep 17 00:00:00 2001 From: kmanpearl Date: Wed, 23 Apr 2025 11:40:57 -0600 Subject: [PATCH 1/9] seeding and workers --- src/pecanpy/pecanpy.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index 923562d0..fef7b4e8 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -3,6 +3,7 @@ from gensim.models import Word2Vec from numba import njit from numba import prange +from numba import set_num_threads from numba_progress import ProgressBar from .graph import BaseGraph @@ -18,11 +19,6 @@ from .typing import Uint64Array from .wrappers import Timer -try: - from numba.np.ufunc.parallel import get_thread_id -except ImportError: # numba<0.56 - from numba.np.ufunc.parallel import _get_thread_id as get_thread_id - class Base(BaseGraph): """Base node2vec object. @@ -88,12 +84,13 @@ def __init__( verbose: bool = False, extend: bool = False, gamma: float = 0, - random_state: Optional[int] = None, + random_state: int = 42, ): super().__init__() self.p = p self.q = q self.workers = workers # TODO: not doing anything, need to fix. + set_num_threads(workers) # I hope this is the fix self.verbose = verbose self.extend = extend self.gamma = gamma @@ -144,12 +141,15 @@ def simulate_walks( has_nbrs = self.get_has_nbrs() verbose = self.verbose + # Create list of seeds + random_states = self._get_random_seeds(random_state, tot_num_jobs) + # Acquire numba progress proxy for displaying the progress bar with ProgressBar(total=tot_num_jobs, disable=not verbose) as progress: walk_idx_mat = self._random_walks( tot_num_jobs, walk_length, - random_state, + random_states, start_node_idx_ary, has_nbrs, move_forward, @@ -161,22 +161,24 @@ def simulate_walks( return walks + @staticmethod + def _get_random_seeds(base_seed: int, num_jobs:int) -> np.ndarray: + """Get random number generators for each thread.""" + rng = np.random.default_rng(base_seed) + return rng.integers(0, 2**31 - 1, size=num_jobs, dtype=np.int32) + @staticmethod @njit(parallel=True, nogil=True) def _random_walks( tot_num_jobs: int, walk_length: int, - random_state: Optional[int], + random_states: Optional[np.ndarray], start_node_idx_ary: Uint32Array, has_nbrs: HasNbrs, move_forward: MoveForward, progress_proxy: ProgressBar, ) -> Uint32Array: """Simulate a random walk starting from start node.""" - # Seed the random number generator - if random_state is not None: - np.random.seed(random_state + get_thread_id()) - # use the last entry of each walk index array to keep track of the # effective walk length walk_idx_mat = np.zeros((tot_num_jobs, walk_length + 2), dtype=np.uint32) @@ -184,6 +186,7 @@ def _random_walks( walk_idx_mat[:, -1] = walk_length + 1 # set to full walk length by default for i in prange(tot_num_jobs): + np.random.seed(random_states[i]) # initialize first step as normal random walk start_node_idx = walk_idx_mat[i, 0] if has_nbrs(start_node_idx): @@ -205,6 +208,7 @@ def _random_walks( progress_proxy.update(1) return walk_idx_mat + def setup_get_normalized_probs(self): """Transition probability computation setup. From 29eb4f999076f0d3f0974c7199e3b7fbd2ed4f23 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Apr 2025 17:59:42 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pecanpy/pecanpy.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index fef7b4e8..7cb4957c 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -90,7 +90,7 @@ def __init__( self.p = p self.q = q self.workers = workers # TODO: not doing anything, need to fix. - set_num_threads(workers) # I hope this is the fix + set_num_threads(workers) # I hope this is the fix self.verbose = verbose self.extend = extend self.gamma = gamma @@ -162,7 +162,7 @@ def simulate_walks( return walks @staticmethod - def _get_random_seeds(base_seed: int, num_jobs:int) -> np.ndarray: + def _get_random_seeds(base_seed: int, num_jobs: int) -> np.ndarray: """Get random number generators for each thread.""" rng = np.random.default_rng(base_seed) return rng.integers(0, 2**31 - 1, size=num_jobs, dtype=np.int32) @@ -208,7 +208,6 @@ def _random_walks( progress_proxy.update(1) return walk_idx_mat - def setup_get_normalized_probs(self): """Transition probability computation setup. From faceca6b64e639397088a15c22a598f13d042f7b Mon Sep 17 00:00:00 2001 From: kmanpearl Date: Thu, 24 Apr 2025 21:59:38 -0600 Subject: [PATCH 3/9] changed cli random state default --- src/pecanpy/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pecanpy/cli.py b/src/pecanpy/cli.py index 577e59a2..33a1cba2 100755 --- a/src/pecanpy/cli.py +++ b/src/pecanpy/cli.py @@ -156,7 +156,7 @@ def parse_args(): parser.add_argument( "--random_state", type=int, - default=None, + default=42, help="Random seed for generating random walks.", ) From 767fa5e6f0c5988c8c4d7ebcb3c5dd6134cd8fa2 Mon Sep 17 00:00:00 2001 From: kmanpearl Date: Thu, 24 Apr 2025 22:53:19 -0600 Subject: [PATCH 4/9] removed comment --- src/pecanpy/pecanpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index fef7b4e8..af106dc8 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -89,8 +89,8 @@ def __init__( super().__init__() self.p = p self.q = q - self.workers = workers # TODO: not doing anything, need to fix. - set_num_threads(workers) # I hope this is the fix + self.workers = workers + set_num_threads(workers) self.verbose = verbose self.extend = extend self.gamma = gamma From 60fa12fa527d23f63b40702c85d407bd543f82f1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 25 Apr 2025 04:57:32 +0000 Subject: [PATCH 5/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pecanpy/pecanpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index a6a2057b..c436f1ef 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -89,8 +89,8 @@ def __init__( super().__init__() self.p = p self.q = q - self.workers = workers - set_num_threads(workers) + self.workers = workers + set_num_threads(workers) self.verbose = verbose self.extend = extend self.gamma = gamma From 45145693899c7046b78858bea1b79a676240fb4e Mon Sep 17 00:00:00 2001 From: kmanpearl Date: Mon, 28 Apr 2025 14:15:10 -0600 Subject: [PATCH 6/9] temp print for version control --- src/pecanpy/pecanpy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index a6a2057b..474e77f5 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -96,6 +96,7 @@ def __init__( self.gamma = gamma self.random_state = random_state self._preprocessed: bool = False + print('Using Keenan\'s branch!!!') def _map_walk(self, walk_idx_ary: Uint32Array) -> List[str]: """Map walk from node index to node ID. From c15c83c480ffa5febf906954f4bda65b1d1f2e1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 20:16:46 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pecanpy/pecanpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index 3871b158..87a6d246 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -96,7 +96,7 @@ def __init__( self.gamma = gamma self.random_state = random_state self._preprocessed: bool = False - print('Using Keenan\'s branch!!!') + print("Using Keenan's branch!!!") def _map_walk(self, walk_idx_ary: Uint32Array) -> List[str]: """Map walk from node index to node ID. From 3b3915f493c680efa3de0fda152b5d16a306f21f Mon Sep 17 00:00:00 2001 From: kmanpearl Date: Mon, 5 May 2025 08:55:13 -0600 Subject: [PATCH 8/9] removed default seed --- src/pecanpy/pecanpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index 87a6d246..45cf6596 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -84,7 +84,7 @@ def __init__( verbose: bool = False, extend: bool = False, gamma: float = 0, - random_state: int = 42, + random_state: int = None, ): super().__init__() self.p = p @@ -96,7 +96,7 @@ def __init__( self.gamma = gamma self.random_state = random_state self._preprocessed: bool = False - print("Using Keenan's branch!!!") + def _map_walk(self, walk_idx_ary: Uint32Array) -> List[str]: """Map walk from node index to node ID. From 6908fe76007fcdedcb54146003e4454bbad4a715 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 14:55:26 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pecanpy/pecanpy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pecanpy/pecanpy.py b/src/pecanpy/pecanpy.py index 45cf6596..98a419e5 100755 --- a/src/pecanpy/pecanpy.py +++ b/src/pecanpy/pecanpy.py @@ -97,7 +97,6 @@ def __init__( self.random_state = random_state self._preprocessed: bool = False - def _map_walk(self, walk_idx_ary: Uint32Array) -> List[str]: """Map walk from node index to node ID.