diff --git a/developer/README.md b/developer/README.md index 27bd1f162..d1992e524 100644 --- a/developer/README.md +++ b/developer/README.md @@ -27,6 +27,14 @@ The `requirements/update_python_packages.py` script automates the creation and m - Generate the `frozen_requirements.txt` file for consistency. 3. Validate that the `pkgs.zip` file contains all necessary packages and matches the updated requirements. +### Shared (Python-version-independent) vendor zips + +In addition to the per-version `pkgs.zip`, `requirements/any/` holds pure-Python +packages that are safe to load across every supported Python version (e.g. the +Autodesk Flow Data Beta SDK). Each zip is auto-discovered by +`tank_vendor/__init__.py` and contains the importable package plus its +`.dist-info/` directory at the zip's root. + ## How to upgrade ruamel.yaml Until version `0.10.10`, the contents of the library was located at `tank_vendor/ruamel_yaml`. diff --git a/python/tank_vendor/__init__.py b/python/tank_vendor/__init__.py index 6fe433ca2..00ddf5859 100644 --- a/python/tank_vendor/__init__.py +++ b/python/tank_vendor/__init__.py @@ -12,9 +12,14 @@ tank_vendor module - Third-party dependency management for Shotgun Toolkit. This module handles loading and importing third-party Python packages from -version-specific ZIP archives (pkgs.zip). It provides: - -1. Auto-discovery of packages in pkgs.zip +ZIP archives. It provides: + +1. Auto-discovery of packages in two locations: + - requirements/./pkgs.zip (per-Python-version; present in + source checkouts, absent when + tk-core is pip-installed and + dependencies come from the env) + - requirements/any/*.zip (Python-version-independent, optional) 2. Lazy import hook for transparent tank_vendor.* namespace aliasing 3. Package-specific patches (e.g., SSL certificate handling for shotgun_api3) @@ -22,14 +27,21 @@ # Direct imports work automatically: from tank_vendor import yaml from tank_vendor.shotgun_api3 import Shotgun + from tank_vendor import flow_data_sdk # Submodule imports work via lazy loading: from tank_vendor.shotgun_api3.lib import httplib2 + from tank_vendor.flow_data_sdk.base import client # Mock.patch works seamlessly: mock.patch("tank_vendor.shotgun_api3.Shotgun.find") -Supported Python versions: 3.7+ +Shared zips in requirements/any/ are loaded after pkgs.zip, so per-version +pinned packages take precedence over anything in the shared directory. +Packages whose top-level name is already registered are skipped with a +warning. + +Supported Python versions: 3.9+ """ import pathlib @@ -199,7 +211,7 @@ def _install_import_hook(): Install a lazy import hook that redirects tank_vendor.* imports to real packages. This enables transparent namespace aliasing, allowing code to use tank_vendor.package - while the actual package is loaded from pkgs.zip without the tank_vendor prefix. + while the actual package is loaded from a ZIP without the tank_vendor prefix. Examples: from tank_vendor.shotgun_api3.lib import httplib2 @@ -227,121 +239,234 @@ def _install_import_hook(): sys.meta_path.insert(0, sys._tank_vendor_meta_finder) -# ============================================================================ -# MAIN INITIALIZATION: Load third-party packages from pkgs.zip -# ============================================================================ +def _discover_top_level_packages(zip_path): + """ + Return the set of top-level importable package names inside a zip. + + Filters out: + - .dist-info: Package metadata directories (still in zip for importlib.metadata, + but not importable as packages) + - __pycache__: Python bytecode cache + - .pyd/.so/.dylib/.dll: Platform-specific binary extensions + - _*: Private/internal modules (e.g., _ruamel_yaml.cp311-win_amd64.pyd) + """ + with zipfile.ZipFile(zip_path, "r") as zf: + top_level = set() + for name in zf.namelist(): + parts = name.split("/") + if parts[0] and not parts[0].endswith(".py"): + top_level.add(parts[0]) + elif parts[0].endswith(".py") and parts[0] != "__pycache__": + top_level.add(parts[0][:-3]) + + return { + pkg + for pkg in top_level + if not pkg.endswith(".dist-info") + and pkg != "__pycache__" + and not pkg.endswith(".py") + and not pkg.endswith(".pyd") + and not pkg.endswith(".so") + and not pkg.endswith(".dylib") + and not pkg.endswith(".dll") + and not pkg.startswith("_") + } + + +def _load_packages_from_zip(zip_path): + """ + Validate a vendor zip, insert it at the front of sys.path, and register + its top-level packages under the tank_vendor namespace. -# Construct path to Python version-specific pkgs.zip containing third-party dependencies. -# Path structure: /requirements/./pkgs.zip -# Example: requirements/3.11/pkgs.zip for Python 3.11 -pkgs_zip_path = ( - pathlib.Path(__file__).resolve().parent.parent.parent - / "requirements" - / f"{sys.version_info.major}.{sys.version_info.minor}" - / "pkgs.zip" -) + Missing or unreadable zips are tolerated (return False, with a warning + for unreadable). A wholesale failure during package discovery/import + raises RuntimeError after cleaning the zip path off sys.path. Individual + package import failures inside the zip warn and are skipped. + + Each zip is always inserted at sys.path[0], so the LAST zip loaded ends + up at the front of sys.path. Collisions are resolved by sys.modules + (first-registered wins), independent of sys.path order — see callers + for the intentional load order. + + Args: + zip_path: pathlib.Path to the zip file. + + Returns: + True if the zip was successfully loaded, False if it was missing + or unreadable. + """ + # Step 1: Validate the zip exists, is a file (not a directory of extracted + # contents, as some CI environments produce), and can be opened as a ZIP. + # Missing zips are silent (pip-installed setups have no pkgs.zip). Unreadable + # zips warn so the failure mode is visible, but don't fail the import. + if not zip_path.exists() or not zip_path.is_file(): + return False + + try: + with zipfile.ZipFile(zip_path, "r") as zf: + zf.namelist() + except (zipfile.BadZipFile, OSError, IOError) as e: + warnings.warn( + f"Failed to load packages from {zip_path}: {e}. " + "Any dependencies it would have provided will need to be resolved " + "from the Python environment, or will fail at import time.", + RuntimeWarning, + stacklevel=2, + ) + return False + + # Step 2: Put the zip on sys.path so Python can import directly from it. + # Insertion ordering is load-bearing: importlib.metadata.version() resolves + # dist-info inside a zip only after the zip is on sys.path. + sys.path.insert(0, str(zip_path)) -# Validate pkgs.zip before attempting to load from it. -# This provides backward compatibility for: -# - Installations using old vendored copies -# - Temporary locations without the requirements directory -# - CI/CD environments where pkgs.zip might be extracted to a directory -_pkgs_zip_valid = False -if pkgs_zip_path.exists(): - # Check if it's a file (not a directory) - in some CI environments, - # pkgs.zip might be extracted to a directory instead of kept as a ZIP. - if pkgs_zip_path.is_file(): - # Validate that it's actually a valid ZIP file before adding to sys.path - try: - with zipfile.ZipFile(pkgs_zip_path, "r") as zf: - # Quick validation - just check that we can read the ZIP directory - zf.namelist() - _pkgs_zip_valid = True - except (zipfile.BadZipFile, OSError, IOError) as e: - # Not a valid ZIP file or can't be read - skip loading from pkgs.zip - warnings.warn( - f"Failed to load packages from {pkgs_zip_path}: {e}. " - "Third-party dependencies will be loaded from the Python environment instead.", - RuntimeWarning, - stacklevel=2, - ) - -# If pkgs.zip is not found, assume pip-style installation where dependencies -# are installed directly in the Python environment. In this case, we still -# install the import hook to enable tank_vendor.* aliasing for compatibility. -if not _pkgs_zip_valid: - # Install import hook even without pkgs.zip for pip installations - _install_import_hook() -else: - # Add pkgs.zip to sys.path so Python can import packages directly from the ZIP. - # Insert at position 0 to prioritize over other installed packages. - sys.path.insert(0, str(pkgs_zip_path)) try: - # Step 1: Auto-discover all top-level packages in pkgs.zip import importlib - with zipfile.ZipFile(pkgs_zip_path, "r") as zf: - # Get all top-level package names from the ZIP - top_level_packages = set() - for name in zf.namelist(): - # Extract first component of path (top-level package/module) - parts = name.split("/") - if parts[0] and not parts[0].endswith(".py"): - # It's a package directory - top_level_packages.add(parts[0]) - elif parts[0].endswith(".py") and parts[0] != "__pycache__": - # It's a top-level module file - top_level_packages.add(parts[0][:-3]) # Remove .py - - # Filter out non-importable items: - # - .dist-info: Package metadata directories - # - __pycache__: Python bytecode cache - # - .py: Single file modules (already captured as packages) - # - .pyd/.so/.dylib: Platform-specific binary extensions - # - _*: Private/internal modules (e.g., _ruamel_yaml.cp311-win_amd64.pyd) - top_level_packages = { - pkg - for pkg in top_level_packages - if not pkg.endswith(".dist-info") - and pkg != "__pycache__" - and not pkg.endswith(".py") - and not pkg.endswith(".pyd") # Windows binary modules - and not pkg.endswith(".so") # Unix/Linux binary modules - and not pkg.endswith(".dylib") # macOS binary modules - and not pkg.startswith("_") # Private/internal modules - } - - # Step 2: Import and register each top-level package under tank_vendor namespace + # Step 3: Auto-discover all top-level packages in the zip. + top_level_packages = _discover_top_level_packages(zip_path) + + # Step 4: Import and register each top-level package under the + # tank_vendor namespace. for package_name in sorted(top_level_packages): + # Collision check: an earlier zip already claimed this name. + # Earlier zips win (pkgs.zip is loaded before requirements/any/). + if f"tank_vendor.{package_name}" in sys.modules: + warnings.warn( + f"Skipping {package_name} from {zip_path}: " + f"already registered under tank_vendor.{package_name} " + f"from an earlier zip.", + RuntimeWarning, + ) + continue + try: + # Import the real module and alias it under tank_vendor.* in + # sys.modules; also expose it as an attribute on this package + # so `from tank_vendor import ` works without going + # through the meta path finder. + # Import the package mod = importlib.import_module(package_name) # Register in sys.modules under tank_vendor namespace sys.modules[f"tank_vendor.{package_name}"] = mod - - # Also set as attribute on tank_vendor module for direct access globals()[package_name] = mod - - except ImportError as e: - # Some packages might not import cleanly on all platforms - # Log but don't fail - they might not be needed - warnings.warn(f"Could not import {package_name} from pkgs.zip: {e}") - - # Step 3: Install import hook for lazy submodule loading - # This enables imports like: from tank_vendor.shotgun_api3.lib import httplib2 - # without pre-importing all submodules (which can fail on version incompatibilities) - _install_import_hook() - - # Step 4: Apply package-specific patches - # These patches work around limitations or fix issues with specific packages - if "shotgun_api3" in sys.modules: - _patch_shotgun_api3_certs(pkgs_zip_path) + except Exception as e: + # Per-package import failures are tolerated. The catch is + # intentionally broad: a future shared vendor using syntax + # newer than the current Python (e.g. PEP 604 union syntax + # `int | None`) would raise SyntaxError at parse time, not + # ImportError. flow_data_sdk on Python 3.9 raises ImportError + # for its references to types.UnionType / typing.TypeAlias, + # which this catch also handles. Wholesale loader failures + # are still handled by the outer try/except. + warnings.warn( + f"Could not import {package_name} from {zip_path}: {e}", + RuntimeWarning, + stacklevel=2, + ) except Exception as e: - # Clean up sys.path on failure to avoid leaving it in an inconsistent state - # with a non-functional ZIP path that could interfere with subsequent imports - sys.path.remove(str(pkgs_zip_path)) + # Clean up sys.path on a wholesale failure so we don't leave a + # non-functional zip on the path interfering with other imports. + try: + sys.path.remove(str(zip_path)) + except ValueError: + pass raise RuntimeError( - f"Failed to import required modules from {pkgs_zip_path}: {e}" + f"Failed to import required modules from {zip_path}: {e}" ) from e + + return True + + +def _release_importlib_metadata_handles(): + """ + Release file handles that importlib.metadata holds on vendor zips. + + Windows-only workaround. + + importlib.metadata.FastPath.__new__ is @lru_cache'd, so the FastPath + instance for any zip it probes is kept alive forever. Inside + FastPath.zip_children(), the line `self.joinpath = zip_path.joinpath` + binds the zipfile.Path (and its underlying open ZipFile) as an instance + attribute on the cached FastPath — so the file handle stays open for + the lifetime of the cache. + + This bites us on Windows / Python 3.13 when flow_data_sdk's _version.py + runs importlib.metadata.version("flow-data-sdk") during import. The + cached FastPath keeps our shared zip open, which then prevents the + tank share_core command from moving install/core (WinError 32 sharing + violation). + + Linux and macOS don't have Windows' sharing-violation semantics — moving + or deleting files with open handles is allowed — so this cleanup is a + no-op on those platforms (and was observed to break a Linux/3.13 + integration test, so we gate strictly on win32). + + invalidate_caches() calls FastPath.__new__.cache_clear() which drops + the FastPath references. gc.collect() forces __del__ on the underlying + ZipFile objects so the handles close immediately rather than at the + next garbage collection cycle. + """ + if sys.platform != "win32": + return + from importlib.metadata import MetadataPathFinder + # invalidate_caches() is declared as `def invalidate_caches(cls)` without + # @classmethod in some Python versions, so call it on an instance for + # cross-version compatibility. + MetadataPathFinder().invalidate_caches() + import gc + + gc.collect() + + +# ============================================================================ +# MAIN INITIALIZATION +# ============================================================================ + +_requirements_dir = pathlib.Path(__file__).resolve().parent.parent.parent / "requirements" + +# Load order matters for two distinct reasons: +# +# 1. sys.modules registration: the FIRST zip to register a top-level package +# wins (later zips' duplicates are skipped). So pkgs.zip is loaded first +# to keep its version-pinned dependencies authoritative. +# +# 2. sys.path order: we insert each zip at sys.path[0], so the LAST zip +# loaded ends up at the front. We want shared zips ahead of pkgs.zip on +# sys.path so that importlib.metadata.version() lookups (e.g. flow_data_sdk's +# _version.py querying its own dist-info) short-circuit on the shared zip +# and never scan pkgs.zip. Scanning pkgs.zip via importlib.metadata caches +# a FastPath instance that holds an open zipfile, which on Windows +# prevents the tank share_core command from moving install/core. +# +# So: load pkgs.zip first (sys.modules), then shared zips (sys.path front). +_pkgs_zip_path = ( + _requirements_dir + / f"{sys.version_info.major}.{sys.version_info.minor}" + / "pkgs.zip" +) +_pkgs_loaded = _load_packages_from_zip(_pkgs_zip_path) +if _pkgs_loaded and "shotgun_api3" in sys.modules: + _patch_shotgun_api3_certs(_pkgs_zip_path) + +# Shared zips (optional, Python-version-independent). Drop a *.zip into +# requirements/any/ and it will be loaded automatically. Shared vendors are +# expected to use the system trust store and not ship data files that would +# need extraction from inside the zip. +_shared_dir = _requirements_dir / "any" +if _shared_dir.is_dir(): + for _shared_zip in sorted(_shared_dir.glob("*.zip")): + _load_packages_from_zip(_shared_zip) + +# Install the lazy import hook for nested submodule access. +# Idempotent via the _tank_vendor_meta_finder guard, so calling it once +# after both load steps is safe and sufficient. +_install_import_hook() + +# Windows-only cleanup: drop importlib.metadata's cached file handles on our +# vendor zips so the tank share_core command can move install/core without +# hitting WinError 32 sharing violations. No-op on Linux/macOS. +_release_importlib_metadata_handles() diff --git a/requirements/any/flow_data_sdk-beta.zip b/requirements/any/flow_data_sdk-beta.zip new file mode 100644 index 000000000..a931e87c0 Binary files /dev/null and b/requirements/any/flow_data_sdk-beta.zip differ diff --git a/tests/core_tests/test_tank_vendor.py b/tests/core_tests/test_tank_vendor.py index 2853154ec..f34969b71 100644 --- a/tests/core_tests/test_tank_vendor.py +++ b/tests/core_tests/test_tank_vendor.py @@ -17,7 +17,8 @@ from tank_test.tank_test_base import ShotgunTestBase # Configuration: Add or remove packages here to test different third-party libraries -# Only include packages that are directly bundled in requirements//pkgs.zip +# Packages from pkgs.zip are always tested. Packages from requirements/any/ +# are version-gated below. PACKAGES_TO_TEST = [ { "name": "yaml", @@ -41,6 +42,24 @@ }, ] +# Flow Data SDK uses types.UnionType and typing.TypeAlias, both 3.10+ only. +# On 3.7/3.9 the shared loader will warn-and-continue; do not assert it here. +if sys.version_info >= (3, 10): + PACKAGES_TO_TEST.append( + { + "name": "flow_data_sdk", + "attributes": [ + "GQLClient", + "WorkflowContext", + "SDK_VERSION", + "DEFAULT_ENDPOINT", + "DEFAULT_AUTH_BASE_URL", + "GQLAPIError", + ], + "description": "Autodesk Flow Data SDK (beta)", + } + ) + class TestTankVendorImports(ShotgunTestBase): """Test importing third-party packages via tank_vendor namespace.""" @@ -246,5 +265,82 @@ def test_cert_file_returns_path(self): self.assertTrue(len(cert_path) > 0) +@unittest.skipIf( + sys.version_info < (3, 10), + "Flow Data SDK requires Python 3.10+ (uses types.UnionType / typing.TypeAlias)", +) +class TestFlowDataSDK(ShotgunTestBase): + """Test the Flow Data SDK loaded from requirements/any/.""" + + def test_submodule_import(self): + """Lazy meta-finder resolves nested imports inside the shared zip.""" + from tank_vendor.flow_data_sdk.base import client + from tank_vendor.flow_data_sdk.base.exceptions import GQLAPIError + + self.assertTrue(hasattr(client, "BaseGQLClient")) + self.assertIsNotNone(GQLAPIError) + + def test_sdk_version_resolved_from_dist_info(self): + """ + Canary: SDK_VERSION must NOT fall back to 'local_dev'. + + flow_data_sdk/base/_version.py resolves SDK_VERSION via + importlib.metadata, which only succeeds when the SDK's .dist-info + directory was preserved in the shared zip. If this fails, the shared + zip in requirements/any/ is missing its .dist-info. + """ + from tank_vendor import flow_data_sdk + + self.assertNotEqual( + flow_data_sdk.SDK_VERSION, + "local_dev", + "SDK_VERSION fell back to 'local_dev' — the shared zip is " + "missing .dist-info.", + ) + self.assertRegex( + flow_data_sdk.SDK_VERSION, + r"^\d+\.\d+", + "SDK_VERSION is not a PEP 440 version", + ) + + def test_dist_info_via_importlib_metadata(self): + """importlib.metadata sees the same version as the SDK reports.""" + from importlib.metadata import version + + from tank_vendor import flow_data_sdk + + self.assertEqual(version("flow-data-sdk"), flow_data_sdk.SDK_VERSION) + + +@unittest.skipIf( + sys.version_info >= (3, 10), + "Test verifies behaviour when the SDK is unimportable due to <3.10 syntax/types", +) +class TestFlowDataSDKAbsentOnOldPython(ShotgunTestBase): + """ + On Python 3.7 and 3.9, flow_data_sdk fails to import because its source + references types.UnionType and typing.TypeAlias (both 3.10+). The shared + loader is supposed to warn and continue, leaving tank_vendor itself + fully usable. These tests pin that contract. + """ + + def test_tank_vendor_imports_cleanly(self): + """`import tank_vendor` must succeed even when shared vendors fail to load.""" + import importlib + + import tank_vendor + + # Re-importing is a no-op when the module is already cached, but the + # call would raise if the loader had been left in an inconsistent + # state by a per-package failure. + importlib.import_module("tank_vendor") + self.assertIsNotNone(tank_vendor) + + def test_flow_data_sdk_unavailable(self): + """The SDK is not registered under the tank_vendor namespace.""" + with self.assertRaises(ImportError): + from tank_vendor import flow_data_sdk # noqa: F401 + + if __name__ == "__main__": unittest.main()