Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ integration_test: build_cpp_extensions


notebooks_test:
RESOURCE_CONFIG_PATH=${GIGL_TEST_DEFAULT_RESOURCE_CONFIG} python -m tests.config_tests.notebooks_test
GIGL_RESOURCE_CONFIG_URI=${GIGL_TEST_DEFAULT_RESOURCE_CONFIG} python -m tests.config_tests.notebooks_test

mock_assets:
uv run python -m gigl.src.mocking.dataset_asset_mocking_suite --resource_config_uri="deployment/configs/e2e_cicd_resource_config.yaml" --env test
Expand Down
49 changes: 44 additions & 5 deletions gigl/env/constants.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,54 @@
"""Environment-variable keys exported by ``launch_custom``.
"""Environment-variable keys used across GiGL.

These keys are set on the subprocess env (never on the parent
``os.environ``) by ``gigl.src.common.custom_launcher.launch_custom`` so
that receiving CLIs can ``os.environ.get(...)`` their runtime context.
Most of these keys are set on subprocess env (never on the parent
``os.environ``) by ``gigl.src.common.custom_launcher.launch_custom`` so that
receiving CLIs can ``os.environ.get(...)`` their runtime context.

``GIGL_RESOURCE_CONFIG_URI`` is also written to the parent ``os.environ`` by
``gigl.env.pipelines_config.get_resource_config`` so that downstream readers
(e.g. ``GiglResourceConfigWrapper.get_resource_config_uri``) can recover the
value within the same process. Use :func:`read_resource_config_uri_from_env`
to read it; that helper also falls back to the deprecated
``RESOURCE_CONFIG_PATH`` with a one-time warning.
"""

from typing import Final
import os
from typing import Final, Optional

from gigl.common.logger import Logger

GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY: Final[str] = "GIGL_APPLIED_TASK_IDENTIFIER"
GIGL_TASK_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_TASK_CONFIG_URI"
GIGL_RESOURCE_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_RESOURCE_CONFIG_URI"
GIGL_CPU_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CPU_DOCKER_URI"
GIGL_CUDA_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CUDA_DOCKER_URI"
GIGL_COMPONENT_ENV_KEY: Final[str] = "GIGL_COMPONENT"

_LEGACY_RESOURCE_CONFIG_ENV_KEY: Final[str] = "RESOURCE_CONFIG_PATH"
_legacy_resource_config_env_warned: bool = False
_logger = Logger()


def read_resource_config_uri_from_env() -> Optional[str]:
"""Read the resource-config URI from the environment.

Prefers ``GIGL_RESOURCE_CONFIG_URI``. Falls back to the deprecated
``RESOURCE_CONFIG_PATH`` and emits a one-time warning if that path is taken.

Returns:
The URI string if set under either name, else ``None``.
"""
global _legacy_resource_config_env_warned
value = os.environ.get(GIGL_RESOURCE_CONFIG_URI_ENV_KEY)
if value is not None:
return value

legacy_value = os.environ.get(_LEGACY_RESOURCE_CONFIG_ENV_KEY)
if legacy_value is not None and not _legacy_resource_config_env_warned:
_logger.warning(
f"Environment variable {_LEGACY_RESOURCE_CONFIG_ENV_KEY!r} is deprecated; "
f"use {GIGL_RESOURCE_CONFIG_URI_ENV_KEY!r} instead. "
"Support for the legacy name will be removed in a future release."
)
_legacy_resource_config_env_warned = True
return legacy_value
15 changes: 10 additions & 5 deletions gigl/env/pipelines_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

from gigl.common import Uri, UriFactory
from gigl.common.logger import Logger
from gigl.env.constants import (
GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
read_resource_config_uri_from_env,
)
from gigl.src.common.types.pb_wrappers.gigl_resource_config import (
GiglResourceConfigWrapper,
)
Expand Down Expand Up @@ -55,8 +59,9 @@ def get_resource_config(
Args:
resource_config_uri: Optional[Uri] = None
The URI of the resource config file. If None, the function will try to load the resource config from the
command-line argument --resource_config_uri or the environment variable RESOURCE_CONFIG_PATH. If these are
not set, the function will try to load the resource config from the pipeline options.
command-line argument --resource_config_uri or the environment variable GIGL_RESOURCE_CONFIG_URI (the
deprecated RESOURCE_CONFIG_PATH is still read as a fallback, with a one-time warning). If these are not
set, the function will try to load the resource config from the pipeline options.

Returns:
resource_config: GiglResourceConfigWrapper
Expand All @@ -77,8 +82,8 @@ def get_resource_config(
required=False,
)
args, _ = parser.parse_known_args()
resource_config_str = args.resource_config_uri or os.getenv(
"RESOURCE_CONFIG_PATH"
resource_config_str = (
args.resource_config_uri or read_resource_config_uri_from_env()
)

if resource_config_str is None:
Expand All @@ -90,7 +95,7 @@ def get_resource_config(
"No resource config provided, either via command-line argument or environment variable."
)

os.environ["RESOURCE_CONFIG_PATH"] = resource_config_str
os.environ[GIGL_RESOURCE_CONFIG_URI_ENV_KEY] = resource_config_str
resource_config_path = UriFactory.create_uri(uri=resource_config_str)

from gigl.common.utils.proto_utils import ProtoUtils
Expand Down
1 change: 0 additions & 1 deletion gigl/src/common/constants/resource_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
SPARK_SPLIT_GENERATOR_CONFIG = "spark_split_generator_config"
DATAFLOW_SPLIT_GENERATOR_CONFIG = "dataflow_split_generator_config"
RESOURCE_CONFIG_OS_ENV = "RESOURCE_CONFIG_PATH"
7 changes: 3 additions & 4 deletions gigl/src/common/types/pb_wrappers/gigl_resource_config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import argparse
import os
from dataclasses import dataclass
from typing import Optional, Union

import gigl.src.common.constants.resource_config as resource_config_constants
from gigl.common import GcsUri, UriFactory
from gigl.common.logger import Logger
from gigl.env.constants import read_resource_config_uri_from_env
from gigl.src.common.constants.components import GiGLComponents
from snapchat.research.gbml.gigl_resource_config_pb2 import (
CustomLauncherConfig,
Expand Down Expand Up @@ -108,8 +107,8 @@ def get_resource_config_uri(self) -> str:
)
args, _ = parser.parse_known_args()

resource_config_path = args.resource_config_uri or os.getenv(
resource_config_constants.RESOURCE_CONFIG_OS_ENV
resource_config_path = (
args.resource_config_uri or read_resource_config_uri_from_env()
)

return str(resource_config_path)
Expand Down
3 changes: 2 additions & 1 deletion scripts/launch_graph_store_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import yaml

from gigl.distributed.utils import get_free_ports
from gigl.env.constants import GIGL_RESOURCE_CONFIG_URI_ENV_KEY

DEFAULT_JOB_ROOT = Path("/tmp/gigl")
JOB_ID_SUFFIX_RE = re.compile(r"^(.*?)(\d+)$")
Expand Down Expand Up @@ -735,7 +736,7 @@ def main(argv: Optional[list[str]] = None) -> int:
"MASTER_ADDR": args.host,
"MASTER_PORT": str(master_port),
"WORLD_SIZE": str(world_size),
"RESOURCE_CONFIG_PATH": resource_config_uri,
GIGL_RESOURCE_CONFIG_URI_ENV_KEY: resource_config_uri,
"COMPUTE_CLUSTER_LOCAL_WORLD_SIZE": str(args.compute_procs_per_node),
"PYTHONUNBUFFERED": "1",
}
Expand Down