From fc843ed14ff14b6d04f3c5f545636bfd2765d641 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 17:33:49 -0500 Subject: [PATCH 01/12] Add support to download models automatically if --model specified in vllm args --- vec_inf/client/_slurm_script_generator.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index a5ede3c1..e1f32c50 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -137,6 +137,13 @@ def _generate_launch_cmd(self) -> str: Server launch command. """ launcher_script = ["\n"] + + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = self.model_weights_path + vllm_args_copy = self.params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( @@ -151,12 +158,12 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=self.model_weights_path, + model_weights_path=model_path, model_name=self.params["model_name"], ) ) - for arg, value in self.params["vllm_args"].items(): + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): launcher_script.append(f" {arg} \\") else: @@ -256,6 +263,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = model_params["model_weights_path"] + vllm_args_copy = model_params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( @@ -265,11 +278,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_params["model_weights_path"], + model_weights_path=model_path, model_name=model_name, ) ) - for arg, value in model_params["vllm_args"].items(): + + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") else: From 5f790ffcce8dadc3c9edf79724eb378e6ee77d97 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 18:21:34 -0500 Subject: [PATCH 02/12] create model dir if it doesn't exist --- vec_inf/client/_slurm_script_generator.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e1f32c50..e52c94ba 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -38,9 +38,11 @@ def __init__(self, params: dict[str, Any]): self.additional_binds = self.params.get("bind", "") if self.additional_binds: self.additional_binds = f" --bind {self.additional_binds}" - self.model_weights_path = str( - Path(self.params["model_weights_parent_dir"], self.params["model_name"]) + model_weights_path = Path( + self.params["model_weights_parent_dir"], self.params["model_name"] ) + model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_path = str(model_weights_path) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -210,11 +212,13 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["additional_binds"] = ( f" --bind {self.params['models'][model_name]['bind']}" ) + model_weights_path = Path( + self.params["models"][model_name]["model_weights_parent_dir"], + model_name, + ) + model_weights_path.mkdir(parents=True, exist_ok=True) self.params["models"][model_name]["model_weights_path"] = str( - Path( - self.params["models"][model_name]["model_weights_parent_dir"], - model_name, - ) + model_weights_path ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: From 0f22bec693f56dba6b15c2887934d5be0fb320d9 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:40:16 -0600 Subject: [PATCH 03/12] Check model weights existence before binding; use HF model name if missing --- .../client/test_slurm_script_generator.py | 31 +++++++++-- vec_inf/client/_slurm_script_generator.py | 52 +++++++++++++------ vec_inf/client/_slurm_templates.py | 8 +-- 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0b55f59b..bad302e2 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -12,6 +12,15 @@ ) +@pytest.fixture(autouse=True) +def patch_model_weights_exists(monkeypatch): + """Ensure model weights directory existence checks default to True.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True + ) + + class TestSlurmScriptGenerator: """Tests for SlurmScriptGenerator class.""" @@ -164,9 +173,8 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - assert ( - "module load " in setup - ) # Remove module name since it's inconsistent between clusters + # Note: module_load_cmd may be empty in some configs, so we don't assert it + # The container setup should still work without it def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" @@ -190,6 +198,23 @@ def test_generate_launch_cmd_singularity(self, singularity_params): assert "--bind /scratch:/scratch,/data:/data" in launch_cmd assert "source" not in launch_cmd + def test_generate_launch_cmd_singularity_no_local_weights( + self, singularity_params, monkeypatch + ): + """Test container launch when model weights directory is missing.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + launch_cmd = generator._generate_launch_cmd() + + assert "exec --nv" in launch_cmd + assert "--bind /path/to/model_weights/test-model" not in launch_cmd + assert "vllm serve test-model" in launch_cmd + def test_generate_launch_cmd_boolean_args(self, basic_params): """Test launch command with boolean vLLM arguments.""" params = basic_params.copy() diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e52c94ba..215526eb 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -41,8 +41,14 @@ def __init__(self, params: dict[str, Any]): model_weights_path = Path( self.params["model_weights_parent_dir"], self.params["model_name"] ) - model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) + self.model_source = ( + self.model_weights_path if self.model_weights_exists else self.params["model_name"] + ) + self.model_bind_option = ( + f" --bind {self.model_weights_path}" if self.model_weights_exists else "" + ) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -53,6 +59,14 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val + # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container + # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: + # if len(self.env_str) == 0: + # self.env_str = "--env " + # else: + # self.env_str += "," + # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" + def _generate_script_content(self) -> str: """Generate the complete Slurm script content. @@ -109,7 +123,7 @@ def _generate_server_setup(self) -> str: server_setup_str = server_setup_str.replace( "CONTAINER_PLACEHOLDER", SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ), @@ -140,16 +154,15 @@ def _generate_launch_cmd(self) -> str: """ launcher_script = ["\n"] - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = self.model_weights_path vllm_args_copy = self.params["vllm_args"].copy() + model_source = self.model_source if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ) @@ -160,7 +173,7 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=self.params["model_name"], ) ) @@ -216,9 +229,19 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_weights_parent_dir"], model_name, ) - model_weights_path.mkdir(parents=True, exist_ok=True) - self.params["models"][model_name]["model_weights_path"] = str( - model_weights_path + model_weights_exists = model_weights_path.exists() + model_weights_path_str = str(model_weights_path) + self.params["models"][model_name]["model_weights_path"] = ( + model_weights_path_str + ) + self.params["models"][model_name]["model_weights_exists"] = ( + model_weights_exists + ) + self.params["models"][model_name]["model_bind_option"] = ( + f" --bind {model_weights_path_str}" if model_weights_exists else "" + ) + self.params["models"][model_name]["model_source"] = ( + model_weights_path_str if model_weights_exists else model_name ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: @@ -267,22 +290,21 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = model_params["model_weights_path"] vllm_args_copy = model_params["vllm_args"].copy() + model_source = model_params.get("model_source", model_params["model_weights_path"]) if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=model_params["model_weights_path"], + model_bind_option=model_params.get("model_bind_option", ""), additional_binds=model_params["additional_binds"], ) ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=model_name, ) ) diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py index 209534d8..47dc6527 100644 --- a/vec_inf/client/_slurm_templates.py +++ b/vec_inf/client/_slurm_templates.py @@ -99,7 +99,7 @@ class SlurmScriptTemplate(TypedDict): "env_vars": [ f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')" ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}}{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "activate_venv": "source {venv}/bin/activate", "server_setup": { "single_node": [ @@ -147,7 +147,7 @@ class SlurmScriptTemplate(TypedDict): ' && mv temp.json "$json_path"', ], "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", @@ -238,9 +238,9 @@ class BatchModelLaunchScriptTemplate(TypedDict): ' "$json_path" > temp_{model_name}.json \\', ' && mv temp_{model_name}.json "$json_path"\n', ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", From 9f2fdd21b7e22b1e557e6f1ee63cd7581a45dc9a Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:43:37 -0600 Subject: [PATCH 04/12] Remove commented code --- vec_inf/client/_slurm_script_generator.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 215526eb..4c1b0e5d 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -59,14 +59,6 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val - # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container - # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: - # if len(self.env_str) == 0: - # self.env_str = "--env " - # else: - # self.env_str += "," - # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" - def _generate_script_content(self) -> str: """Generate the complete Slurm script content. From 38011beecad2e5eb5e458a660373c960d3802704 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:47:57 -0600 Subject: [PATCH 05/12] Apply code formatting fixes from pre-commit --- tests/vec_inf/client/test_slurm_script_generator.py | 2 -- vec_inf/client/_slurm_script_generator.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index bad302e2..0abd80bb 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -15,7 +15,6 @@ @pytest.fixture(autouse=True) def patch_model_weights_exists(monkeypatch): """Ensure model weights directory existence checks default to True.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True ) @@ -202,7 +201,6 @@ def test_generate_launch_cmd_singularity_no_local_weights( self, singularity_params, monkeypatch ): """Test container launch when model weights directory is missing.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False, diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 4c1b0e5d..364dd6b8 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -44,7 +44,9 @@ def __init__(self, params: dict[str, Any]): self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) self.model_source = ( - self.model_weights_path if self.model_weights_exists else self.params["model_name"] + self.model_weights_path + if self.model_weights_exists + else self.params["model_name"] ) self.model_bind_option = ( f" --bind {self.model_weights_path}" if self.model_weights_exists else "" @@ -283,7 +285,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) ) vllm_args_copy = model_params["vllm_args"].copy() - model_source = model_params.get("model_source", model_params["model_weights_path"]) + model_source = model_params.get( + "model_source", model_params["model_weights_path"] + ) if "--model" in vllm_args_copy: model_source = vllm_args_copy.pop("--model") @@ -300,7 +304,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") From 4de35635e7d8f9c2343372dc29ad59da983de832 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:57:21 -0600 Subject: [PATCH 06/12] revert unnecessary test change --- tests/vec_inf/client/test_slurm_script_generator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0abd80bb..62b36140 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -172,8 +172,9 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - # Note: module_load_cmd may be empty in some configs, so we don't assert it - # The container setup should still work without it + assert ( + "module load " in setup + ) # Remove module name since it's inconsistent between clusters def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" From 8b6a2119da676c0d077af288f72f6dc199e03af1 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:05:28 -0600 Subject: [PATCH 07/12] Apply formatting fixes from pre-commit --- vec_inf/client/_slurm_script_generator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0f78fb18..01b786ea 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -121,7 +121,9 @@ def _generate_server_setup(self) -> str: server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"])) server_script.append( SLURM_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=self.model_weights_path if self.model_weights_exists else "", + model_weights_path=self.model_weights_path + if self.model_weights_exists + else "", additional_binds=self.additional_binds, ) ) @@ -289,7 +291,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"]) script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=model_params["model_weights_path"] if model_params.get("model_weights_exists", True) else "", + model_weights_path=model_params["model_weights_path"] + if model_params.get("model_weights_exists", True) + else "", additional_binds=model_params["additional_binds"], ) ) From c68cb3553a4c7abdff595e8fef08395f66076256 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:21:10 -0600 Subject: [PATCH 08/12] Add tests for model weights existence coverage --- .../client/test_slurm_script_generator.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index b962f0f9..a81a962d 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -176,6 +176,21 @@ def test_generate_server_setup_singularity(self, singularity_params): "module load " in setup ) # Remove module name since it's inconsistent between clusters + def test_generate_server_setup_singularity_no_weights( + self, singularity_params, monkeypatch + ): + """Test server setup when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + setup = generator._generate_server_setup() + + assert "ray stop" in setup + assert "/path/to/model_weights/test-model" not in setup + def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" generator = SlurmScriptGenerator(basic_params) @@ -415,6 +430,24 @@ def test_generate_model_launch_script_singularity( mock_touch.assert_called_once() mock_write_text.assert_called_once() + @patch("pathlib.Path.touch") + @patch("pathlib.Path.write_text") + def test_generate_model_launch_script_singularity_no_weights( + self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch + ): + """Test batch model launch script when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = BatchSlurmScriptGenerator(batch_singularity_params) + script_path = generator._generate_model_launch_script("model1") + + assert script_path.name == "launch_model1.sh" + call_args = mock_write_text.call_args[0][0] + assert "/path/to/model_weights/model1" not in call_args + @patch("vec_inf.client._slurm_script_generator.datetime") @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text") From b610891a831aa6bd6511331fc68237927c6e0b70 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:32:03 -0600 Subject: [PATCH 09/12] Remove redundant /dev/infiniband --- vec_inf/client/_slurm_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py index ab607fa1..62697006 100644 --- a/vec_inf/client/_slurm_templates.py +++ b/vec_inf/client/_slurm_templates.py @@ -98,7 +98,7 @@ class SlurmScriptTemplate(TypedDict): f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop", ], "imports": "source {src_dir}/find_port.sh", - "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g'),/dev,/tmp{{model_weights_path}}{{additional_binds}}", + "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp{{model_weights_path}}{{additional_binds}}", "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\", "activate_venv": "source {venv}/bin/activate", "server_setup": { From a7a5debbed1d2c6d868a13c38e480efe64ebea4d Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:35:28 -0600 Subject: [PATCH 10/12] Remove unused variable --- vec_inf/client/_slurm_script_generator.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 01b786ea..89bac418 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -47,9 +47,6 @@ def __init__(self, params: dict[str, Any]): if self.model_weights_exists else self.params["model_name"] ) - self.model_bind_option = ( - f",{self.model_weights_path}" if self.model_weights_exists else "" - ) self.env_str = self._generate_env_str() def _generate_env_str(self) -> str: From bb3142bb674b19658d5355ddb9a389c807208172 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:45:44 -0600 Subject: [PATCH 11/12] Add warning if downloading weights and HF cache not set --- vec_inf/client/_slurm_script_generator.py | 13 ++++++++ vec_inf/client/_utils.py | 40 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 89bac418..0c86d688 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -15,6 +15,7 @@ SLURM_SCRIPT_TEMPLATE, ) from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME +from vec_inf.client._utils import check_and_warn_hf_cache class SlurmScriptGenerator: @@ -47,6 +48,11 @@ def __init__(self, params: dict[str, Any]): if self.model_weights_exists else self.params["model_name"] ) + check_and_warn_hf_cache( + self.model_weights_exists, + self.model_weights_path, + self.params.get("env", {}), + ) self.env_str = self._generate_env_str() def _generate_env_str(self) -> str: @@ -253,6 +259,13 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_source"] = ( model_weights_path_str if model_weights_exists else model_name ) + check_and_warn_hf_cache( + model_weights_exists, + model_weights_path_str, + self.params["models"][model_name].get("env", {}), + model_name, + ) + def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: """Write the generated Slurm script to the log directory. diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index 67d907ec..e39b28d9 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -456,3 +456,43 @@ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]: f"{arg} is required, please set it in the command arguments or environment variables" ) return env_overrides + + +def check_and_warn_hf_cache( + model_weights_exists: bool, + model_weights_path: str, + env_dict: dict[str, str], + model_name: str | None = None, +) -> None: + """Warn if model weights don't exist and HuggingFace cache directory is not set. + + Parameters + ---------- + model_weights_exists : bool + Whether the model weights exist at the expected path. + model_weights_path : str + The expected path to the model weights. + env_dict : dict[str, str] + Dictionary of environment variables to check (from --env parameter). + model_name : str | None, optional + Optional model name to include in the warning message (for batch mode). + """ + if model_weights_exists: + return + + hf_cache_vars = ["HF_HOME", "HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE"] + hf_cache_set = any( + os.environ.get(var) or env_dict.get(var) for var in hf_cache_vars + ) + + if not hf_cache_set: + model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights " + warnings.warn( + f"{model_prefix}not found at '{model_weights_path}' and no " + f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or " + f"HUGGINGFACE_HUB_CACHE). The model may be downloaded to your home " + f"directory, which could consume your storage quota. Consider setting " + f"one of these environment variables to a shared cache location.", + UserWarning, + stacklevel=4, + ) From 2db9c6c860fc2857ca5e9d450587266ac10d9ff5 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:49:34 -0600 Subject: [PATCH 12/12] format ONLY --- vec_inf/client/_slurm_script_generator.py | 1 - vec_inf/client/_utils.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0c86d688..433a8f27 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -266,7 +266,6 @@ def __init__(self, params: dict[str, Any]): model_name, ) - def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: """Write the generated Slurm script to the log directory. diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index e39b28d9..2f3b01d6 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -486,7 +486,9 @@ def check_and_warn_hf_cache( ) if not hf_cache_set: - model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights " + model_prefix = ( + f"Model weights for '{model_name}' " if model_name else "Model weights " + ) warnings.warn( f"{model_prefix}not found at '{model_weights_path}' and no " f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or "