From 29cf4815560e5c71302056ee4d0839983ecd5306 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Thu, 5 Mar 2026 21:38:13 +0000 Subject: [PATCH] feat: Introduce Dataproc Environment Inspection Framework This change introduces a new Python-based GCP Environment Inspection Framework under the `introspection/` directory, aimed at proactively diagnosing network, IAM, and other configuration issues for Dataproc users, as outlined in https://issuetracker.google.com/issues/466590510. **Key Features:** * **Modular Architecture:** The framework uses a dynamic module loader (`inspect_env.py`) to load checks from subdirectories in `modules/`. * **Configuration:** Supports configuration via environment variables or an `env.json` file (specified by `INSPECT_ENV_CONFIG`), consistent with `../gcloud/` tooling. * **Initial Modules:** * `network`: Checks VPC, Subnetwork, and Private Google Access. * `iam`: Validates VM Service Account and Dataproc Service Agent roles. * `storage`: Checks IAM permissions on specified GCS buckets. * `cloudsql`: Performs basic checks on Cloud SQL instances if configured (e.g., for Hive Metastore). * **Testing:** Each module includes unit tests. The `network` module also includes a Dockerfile and `cloudbuild.yaml` for integration testing. **Modules Added:** * **`iam`:** Checks for necessary roles on the Dataproc VM service account and the Dataproc Service Agent. * **`network`:** Validates network and subnetwork existence and Private Google Access settings. Includes integration tests runnable in Cloud Build. * **`storage`:** Ensures the VM service account has appropriate permissions on GCS buckets listed in `DATAPROC_GCS_BUCKETS` or derived from `BUCKET` / `TEMP_BUCKET` in `env.json`. * **`cloudsql`:** Checks for the existence and `RUNNABLE` state of a Cloud SQL instance specified by `CLOUD_SQL_INSTANCE_NAME` in `env.json`. This framework provides a foundation for expanding checks to cover more services and scenarios, helping to meet the goals of Project Pathfinder to reduce customer friction during setup and migration. --- introspection/README.md | 48 ++++++ introspection/inspect_env.py | 128 ++++++++++++++ introspection/modules/cloudsql/README.md | 22 +++ introspection/modules/cloudsql/checks.py | 31 ++++ .../modules/cloudsql/requirements.txt | 1 + introspection/modules/cloudsql/test_checks.py | 57 +++++++ introspection/modules/iam/README.md | 11 ++ introspection/modules/iam/checks.py | 137 +++++++++++++++ introspection/modules/iam/requirements.txt | 2 + introspection/modules/iam/test_checks.py | 105 ++++++++++++ introspection/modules/network/Dockerfile | 21 +++ introspection/modules/network/README.md | 12 ++ introspection/modules/network/checks.py | 81 +++++++++ introspection/modules/network/cloudbuild.yaml | 26 +++ .../modules/network/integration_test.py | 128 ++++++++++++++ .../modules/network/requirements.txt | 2 + introspection/modules/network/test_checks.py | 159 ++++++++++++++++++ introspection/modules/storage/README.md | 16 ++ introspection/modules/storage/checks.py | 78 +++++++++ .../modules/storage/requirements.txt | 1 + introspection/modules/storage/test_checks.py | 85 ++++++++++ 21 files changed, 1151 insertions(+) create mode 100644 introspection/README.md create mode 100644 introspection/inspect_env.py create mode 100644 introspection/modules/cloudsql/README.md create mode 100644 introspection/modules/cloudsql/checks.py create mode 100644 introspection/modules/cloudsql/requirements.txt create mode 100644 introspection/modules/cloudsql/test_checks.py create mode 100644 introspection/modules/iam/README.md create mode 100644 introspection/modules/iam/checks.py create mode 100644 introspection/modules/iam/requirements.txt create mode 100644 introspection/modules/iam/test_checks.py create mode 100644 introspection/modules/network/Dockerfile create mode 100644 introspection/modules/network/README.md create mode 100644 introspection/modules/network/checks.py create mode 100644 introspection/modules/network/cloudbuild.yaml create mode 100644 introspection/modules/network/integration_test.py create mode 100644 introspection/modules/network/requirements.txt create mode 100644 introspection/modules/network/test_checks.py create mode 100644 introspection/modules/storage/README.md create mode 100644 introspection/modules/storage/checks.py create mode 100644 introspection/modules/storage/requirements.txt create mode 100644 introspection/modules/storage/test_checks.py diff --git a/introspection/README.md b/introspection/README.md new file mode 100644 index 0000000..04377fc --- /dev/null +++ b/introspection/README.md @@ -0,0 +1,48 @@ +# Dataproc Introspection Tools + +This directory contains tools for inspecting and validating GCP environments, +specifically for use with Dataproc and related services. + +## Prerequisites + +This tool is designed to inspect GCP environments, typically set up for Dataproc. To provision a standard or private network environment matching the assumptions of these checks, you can use the scripts located in the `../gcloud/` directory (relative to this `introspection` directory), as described in `../gcloud/README.md`. + +These scripts can create the VPC, subnets, service accounts, and proxy configurations that this introspection tool can then validate. It is recommended to use the same `env.json` file to configure both the provisioning scripts and this inspection tool for consistency. + +## Configuration + +Configuration can be provided via environment variables. Alternatively, you can use a single `env.json` file to specify most parameters. + +### Using `env.json` + +To use a JSON configuration file, set the `INSPECT_ENV_CONFIG` environment variable to the path of your file: + +```bash +export INSPECT_ENV_CONFIG="/path/to/your/env.json" +``` + +The tool will load parameters from this file. Environment variables for specific settings (e.g., `GOOGLE_CLOUD_PROJECT`, `REGION`) will take precedence over values in the `env.json` file. + +See `b/466590510/cloud-dataproc/gcloud/env.json.sample` for an example structure. Key fields used: + +- `PROJECT_ID`: Sets `GOOGLE_CLOUD_PROJECT` +- `REGION`: Sets `REGION` +- `DATAPROC_SA_EMAIL`: Sets `DATAPROC_SA_EMAIL` +- `BUCKET`: Added to `DATAPROC_GCS_BUCKETS` +- `TEMP_BUCKET`: Added to `DATAPROC_GCS_BUCKETS` +- `NETWORK`: Sets `NETWORK` +- `SUBNET`: Sets `SUBNET` + +### Environment Variables + +- `INSPECT_ENV_CONFIG`: Path to an optional `env.json` configuration file. +- `GOOGLE_CLOUD_PROJECT`: Required, the GCP Project ID. +- `REGION`: Required, the GCP Region. +- `INSPECT_MODULES`: Comma-separated list of modules to run (e.g., `network,iam,storage`). Defaults to all. +- See module-specific READMEs for additional variables. + +## Running + +```bash +python inspect_env.py +``` diff --git a/introspection/inspect_env.py b/introspection/inspect_env.py new file mode 100644 index 0000000..3e6f13a --- /dev/null +++ b/introspection/inspect_env.py @@ -0,0 +1,128 @@ +import json +import os +import importlib +import inspect +import pkgutil +import sys +import google.auth +from googleapiclient.discovery import build + +def load_config_from_env_json(config_path): + """Loads configuration from a JSON file.""" + if os.path.exists(config_path): + try: + with open(config_path, 'r') as f: + return json.load(f) + except json.JSONDecodeError as e: + print(f"[WARN] Error decoding JSON from {config_path}: {e}") + except Exception as e: + print(f"[WARN] Error reading config file {config_path}: {e}") + else: + print(f"[INFO] Config file not found: {config_path}") + return {} + +def run_inspections(compute_service, crm_service, iam_service, storage_service, sqladmin_service, project_id, region, config, selected_modules_str = os.environ.get("INSPECT_MODULES"): + selected_modules = selected_modules_str.split(',') if selected_modules_str else None + """Dynamically discovers and runs inspection modules.""" + all_results = {} + package_path = os.path.dirname(__file__) + + for importer, modname, ispkg in pkgutil.iter_modules([os.path.join(package_path, 'modules')]): + if selected_modules and modname not in selected_modules: + continue + + if ispkg: + print(f" +--- Running Inspections for Module: {modname} ---") + try: + module = importlib.import_module(f'modules.{modname}.checks') + dataproc_sa_email = os.environ.get("DATAPROC_SA_EMAIL") + if not dataproc_sa_email: + project_number = module.get_project_number(crm_service, project_id) if modname == 'iam' else None + if project_number: + dataproc_sa_email = f"{project_number}-compute@developer.gserviceaccount.com" + + if modname == 'network': + module_results = module.run_checks(compute_service, project_id, region) + elif modname == 'iam': + module_results = module.run_checks(compute_service, crm_service, iam_service, project_id, region) + elif modname == 'storage': + module_results = module.run_checks(storage_service, project_id, region, dataproc_sa_email) + elif modname == 'cloudsql': + module_results = module.run_checks(sqladmin_service, project_id, region, config) + else: + print(f"[WARN] No execution logic defined for module {modname}") + continue + + all_results[modname] = module_results + for success, message in module_results: + print(f"[{'PASS' if success else 'FAIL'}] {message}") + except ImportError as e: + print(f"[ERROR] Could not import module {modname}: {e}") + except AttributeError as e: + print(f"[ERROR] Module {modname} does not have a run_checks function or a required service is missing: {e}") + except Exception as e: + print(f"[ERROR] Error running module {modname}: {e}") + return all_results + +def main(): + """Main function to run environment inspections.""" + config = {} + config_path = os.environ.get("INSPECT_ENV_CONFIG") + if config_path: + print(f"[INFO] Loading configuration from {config_path}") + config = load_config_from_env_json(config_path) + + project_id = os.environ.get("GOOGLE_CLOUD_PROJECT", config.get("PROJECT_ID")) + region = os.environ.get("REGION", config.get("REGION")) + dataproc_sa_email = os.environ.get("DATAPROC_SA_EMAIL", config.get("DATAPROC_SA_EMAIL")) + bucket = os.environ.get("BUCKET", config.get("BUCKET")) + temp_bucket = os.environ.get("TEMP_BUCKET", config.get("TEMP_BUCKET")) + + # Set env vars so modules can use them directly + if project_id and not os.environ.get("GOOGLE_CLOUD_PROJECT"): os.environ["GOOGLE_CLOUD_PROJECT"] = project_id + if region and not os.environ.get("REGION"): os.environ["REGION"] = region + if dataproc_sa_email and not os.environ.get("DATAPROC_SA_EMAIL"): os.environ["DATAPROC_SA_EMAIL"] = dataproc_sa_email + + gcs_buckets = [] + if bucket: gcs_buckets.append(bucket) + if temp_bucket: gcs_buckets.append(temp_bucket) + if gcs_buckets and not os.environ.get("DATAPROC_GCS_BUCKETS"): + os.environ["DATAPROC_GCS_BUCKETS"] = ",".join(gcs_buckets) + elif gcs_buckets and os.environ.get("DATAPROC_GCS_BUCKETS"): + # Append to existing env var + existing_buckets = os.environ["DATAPROC_GCS_BUCKETS"].split(',') + for b in gcs_buckets: + if b not in existing_buckets: + existing_buckets.append(b) + os.environ["DATAPROC_GCS_BUCKETS"] = ",".join(existing_buckets) + + if not project_id: + print("[FAIL] Missing required configuration: GOOGLE_CLOUD_PROJECT (or PROJECT_ID in env.json)") + return + if not region: + print("[FAIL] Missing required configuration: REGION (or REGION in env.json)") + return + + print(f"--- Starting GCP Environment Inspection for Project: {project_id}, Region: {region} ---") + + try: + credentials, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + compute_service = build('compute', 'v1', credentials=credentials) + crm_service = build('cloudresourcemanager', 'v1', credentials=credentials) + iam_service = build('iam', 'v1', credentials=credentials) + storage_service = build('storage', 'v1', credentials=credentials) + sqladmin_service = build('sqladmin', 'v1beta4', credentials=credentials) + + # Pass services to the run_inspections function + run_inspections(compute_service, crm_service, iam_service, storage_service, sqladmin_service, project_id, region, config) + + except google.auth.exceptions.DefaultCredentialsError: + print("[FAIL] Could not obtain Application Default Credentials.") + except Exception as e: + print(f"[FAIL] An unexpected error occurred: {e}") + +if __name__ == '__main__': + # Add modules directory to path for dynamic import + sys.path.append(os.path.join(os.path.dirname(__file__), 'modules')) + main() diff --git a/introspection/modules/cloudsql/README.md b/introspection/modules/cloudsql/README.md new file mode 100644 index 0000000..20dfe28 --- /dev/null +++ b/introspection/modules/cloudsql/README.md @@ -0,0 +1,22 @@ +# Cloud SQL Inspection Module + +This module checks configurations related to Cloud SQL, particularly when used as a Hive Metastore backend for Dataproc. + +## Checks Performed + +These checks are only performed if `CLOUD_SQL_INSTANCE_NAME` is defined in the configuration (e.g., `env.json`). + +1. **Instance Existence:** Verifies the specified Cloud SQL instance exists. +2. **Instance State:** Checks if the instance is in the `RUNNABLE` state. + +## Configuration + +Parameters can be set via environment variables directly or loaded from an `env.json` file specified by `INSPECT_ENV_CONFIG` (see main README). + +**Required `env.json` keys to activate module:** + +- `CLOUD_SQL_INSTANCE_NAME`: The name of the Cloud SQL instance to check. + +Optional keys for future checks: + +- `CLOUD_SQL_REGION`: The region of the Cloud SQL instance (if different from the Dataproc region). diff --git a/introspection/modules/cloudsql/checks.py b/introspection/modules/cloudsql/checks.py new file mode 100644 index 0000000..f60802c --- /dev/null +++ b/introspection/modules/cloudsql/checks.py @@ -0,0 +1,31 @@ +import os + +def run_checks(sqladmin_service, project_id, region, config): + """Runs Cloud SQL checks if configuration is provided.""" + all_results = [] + instance_name = config.get("CLOUD_SQL_INSTANCE_NAME") + + if not instance_name: + all_results.append((None, "INFO: CLOUD_SQL_INSTANCE_NAME not set in config. Skipping Cloud SQL checks.")) + return all_results + + all_results.append((None, f"INFO: Checking Cloud SQL instance '{instance_name}'.")) + + try: + # Check Instance Existence and State + request = sqladmin_service.instances().get(project=project_id, instance=instance_name) + instance = request.execute() + all_results.append((True, f"Cloud SQL instance '{instance_name}' exists.")) + + state = instance.get('state') + if state == 'RUNNABLE': + all_results.append((True, f"Cloud SQL instance '{instance_name}' is in state: {state}.")) + else: + all_results.append((False, f"Cloud SQL instance '{instance_name}' is in state: {state}. Expected: RUNNABLE.")) + + # TODO: Add network connectivity checks + + except Exception as e: + all_results.append((False, f"Error checking Cloud SQL instance '{instance_name}': {e}")) + + return all_results diff --git a/introspection/modules/cloudsql/requirements.txt b/introspection/modules/cloudsql/requirements.txt new file mode 100644 index 0000000..d8055e0 --- /dev/null +++ b/introspection/modules/cloudsql/requirements.txt @@ -0,0 +1 @@ +google-api-python-client diff --git a/introspection/modules/cloudsql/test_checks.py b/introspection/modules/cloudsql/test_checks.py new file mode 100644 index 0000000..aa3ecae --- /dev/null +++ b/introspection/modules/cloudsql/test_checks.py @@ -0,0 +1,57 @@ +import unittest +from unittest import mock + +# Mock google.auth before importing checks +with mock.patch('google.auth.default', return_value=(mock.MagicMock(), None)): + from modules.cloudsql.checks import run_checks + +class TestCloudSqlChecks(unittest.TestCase): + + def setUp(self): + self.mock_sqladmin_service = mock.MagicMock() + self.project_id = "test-project" + self.region = "us-central1" + + def test_run_checks_no_config(self): + config = {} + results = run_checks(self.mock_sqladmin_service, self.project_id, self.region, config) + self.assertEqual(len(results), 1) + self.assertIsNone(results[0][0]) + self.assertIn("CLOUD_SQL_INSTANCE_NAME not set", results[0][1]) + self.mock_sqladmin_service.instances().get.assert_not_called() + + def test_run_checks_instance_exists_runnable(self): + config = {"CLOUD_SQL_INSTANCE_NAME": "test-instance"} + self.mock_sqladmin_service.instances().get().execute.return_value = { + "name": "test-instance", + "state": "RUNNABLE", + } + results = run_checks(self.mock_sqladmin_service, self.project_id, self.region, config) + self.assertEqual(len(results), 3) + self.assertTrue(results[1][0]) # Exists + self.assertTrue(results[2][0]) # Runnable + self.assertIn("exists", results[1][1]) + self.assertIn("is in state: RUNNABLE", results[2][1]) + + def test_run_checks_instance_exists_not_runnable(self): + config = {"CLOUD_SQL_INSTANCE_NAME": "test-instance"} + self.mock_sqladmin_service.instances().get().execute.return_value = { + "name": "test-instance", + "state": "SUSPENDED", + } + results = run_checks(self.mock_sqladmin_service, self.project_id, self.region, config) + self.assertEqual(len(results), 3) + self.assertTrue(results[1][0]) # Exists + self.assertFalse(results[2][0]) # Not Runnable + self.assertIn("is in state: SUSPENDED", results[2][1]) + + def test_run_checks_instance_error(self): + config = {"CLOUD_SQL_INSTANCE_NAME": "test-instance"} + self.mock_sqladmin_service.instances().get().execute.side_effect = Exception("API Error") + results = run_checks(self.mock_sqladmin_service, self.project_id, self.region, config) + self.assertEqual(len(results), 2) + self.assertFalse(results[1][0]) + self.assertIn("Error checking Cloud SQL instance", results[1][1]) + +if __name__ == '__main__': + unittest.main() diff --git a/introspection/modules/iam/README.md b/introspection/modules/iam/README.md new file mode 100644 index 0000000..0c4d433 --- /dev/null +++ b/introspection/modules/iam/README.md @@ -0,0 +1,11 @@ +# IAM Inspection Module + +This module checks IAM configurations relevant to Dataproc deployments. + +**Configuration:** + +Parameters can be set via environment variables directly or loaded from an `env.json` file specified by `INSPECT_ENV_CONFIG` (see main README). + +**Environment Variables / `env.json` keys:** + +* `DATAPROC_SA_EMAIL`: The full email address of the service account used by the Dataproc cluster VMs (e.g., my-sa@my-project.iam.gserviceaccount.com). diff --git a/introspection/modules/iam/checks.py b/introspection/modules/iam/checks.py new file mode 100644 index 0000000..09dd37e --- /dev/null +++ b/introspection/modules/iam/checks.py @@ -0,0 +1,137 @@ +import os +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +def get_project_number(crm_service, project_id): + """Helper to get project number from project ID.""" + try: + project = crm_service.projects().get(projectId=project_id).execute() + return project.get('projectNumber') + except Exception as e: + print(f"Error fetching project number for {project_id}: {e}") + return None + +def check_service_account_exists(iam_service, project_id, sa_email): + """Checks if the service account exists.""" + try: + name = f"projects/{project_id}/serviceAccounts/{sa_email}" + iam_service.projects().serviceAccounts().get(name=name).execute() + return (True, f"Service Account '{sa_email}' exists.") + except HttpError as e: + if e.resp.status == 404: + return (False, f"Service Account '{sa_email}' does not exist.") + else: + return (False, f"Error checking Service Account '{sa_email}': {e}") + except Exception as e: + return (False, f"Error checking Service Account '{sa_email}': {e}") + +def get_sa_roles(crm_service, project_id, sa_email): + """Helper to get roles granted to a service account on a project.""" + member = f"serviceAccount:{sa_email}" + try: + policy = crm_service.projects().getIamPolicy(resource=project_id, body={}).execute() + bindings = policy.get('bindings', []) + found_roles = set() + for binding in bindings: + role = binding.get('role') + if member in binding.get('members', []): + found_roles.add(role) + return found_roles + except Exception as e: + raise Exception(f"Error checking IAM policy for '{sa_email}': {e}") + +def check_vm_service_account_roles(crm_service, project_id, sa_email): + """Checks roles for the Dataproc VM Service Account.""" + results = [] + if not sa_email: + results.append((False, "DATAPROC_SA_EMAIL environment variable not set.")) + return results + + required_roles = ["roles/dataproc.worker"] + optional_roles = { + "roles/bigquery.dataEditor": "Required for full BigQuery read/write access.", + "roles/bigquery.user": "Required for running BigQuery jobs.", + "roles/cloudsql.client": "Required for connecting to Cloud SQL instances via proxy.", + "roles/metastore.user": "Required for accessing Dataproc Metastore.", + "roles/secretmanager.secretAccessor": "Required for accessing secrets in Secret Manager.", + "roles/logging.logWriter": "Recommended for writing logs.", + "roles/monitoring.metricWriter": "Recommended for writing metrics.", + } + gcs_admin_role = "roles/storage.admin" + + try: + found_roles = get_sa_roles(crm_service, project_id, sa_email) + + for role in required_roles: + if role in found_roles: + results.append((True, f"VM SA '{sa_email}' has required role '{role}'.")) + else: + results.append((False, f"VM SA '{sa_email}' is MISSING required role '{role}'.")) + + for role, description in optional_roles.items(): + if role in found_roles: + results.append((True, f"VM SA '{sa_email}' has optional role '{role}' ({description}).")) + + if gcs_admin_role in found_roles: + results.append((False, f"VM SA '{sa_email}' has '{gcs_admin_role}', which is overly broad. BEST PRACTICE: Use granular roles on specific GCS buckets. Bucket permissions are checked in the 'storage' module.")) + + return results + except Exception as e: + return [(False, str(e))] + +def check_dataproc_service_agent_roles(crm_service, project_id, project_number): + """Checks roles for the Dataproc Service Agent.""" + results = [] + if not project_number: + return [(False, f"Could not determine project number for project ID '{project_id}'.")] + + sa_email = f"service-{project_number}@dataproc-accounts.iam.gserviceaccount.com" + required_role = "roles/dataproc.serviceAgent" + + try: + found_roles = get_sa_roles(crm_service, project_id, sa_email) + if required_role in found_roles: + results.append((True, f"Dataproc SA '{sa_email}' has required role '{required_role}'.")) + else: + results.append((False, f"Dataproc SA '{sa_email}' is MISSING required role '{required_role}'.")) + + results.append((None, "INFO: If using Shared VPC, ensure Dataproc SA has 'roles/compute.networkUser' in the HOST project.")) + return results + except Exception as e: + return [(False, str(e))] + +# Placeholder for CMEK checks +# def check_cmek_permissions(kms_service, project_id, vm_sa_email, dataproc_sa_email, compute_sa_email, key_name): +# results = [] +# required_role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" +# members_to_check = [f"serviceAccount:{vm_sa_email}", f"serviceAccount:{dataproc_sa_email}", f"serviceAccount:{compute_sa_email}"] +# ... logic to check getIamPolicy on the KMS key ... +# return results + +def run_checks(compute_service, crm_service, iam_service, project_id, region): + """Runs all IAM checks.""" + all_results = [] + project_number = get_project_number(crm_service, project_id) + + dataproc_sa_email = os.environ.get("DATAPROC_SA_EMAIL") + if not dataproc_sa_email: + if project_number: + dataproc_sa_email = f"{project_number}-compute@developer.gserviceaccount.com" + all_results.append((None, f"INFO: DATAPROC_SA_EMAIL not set, assuming default Compute SA: {dataproc_sa_email}")) + else: + all_results.append((False, "ERROR: DATAPROC_SA_EMAIL not set and could not form default SA name.")) + return all_results + else: + all_results.append((None, f"INFO: Checking VM SA: {dataproc_sa_email}")) + + exists_result = check_service_account_exists(iam_service, project_id, dataproc_sa_email) + all_results.append(exists_result) + + if exists_result[0]: # Only check roles if SA exists + all_results.extend(check_vm_service_account_roles(crm_service, project_id, dataproc_sa_email)) + + all_results.extend(check_dataproc_service_agent_roles(crm_service, project_id, project_number)) + + all_results.append((None, "INFO: CMEK checks not yet implemented. If using CMEK, ensure service accounts have access to the KMS keys.")) + + return all_results diff --git a/introspection/modules/iam/requirements.txt b/introspection/modules/iam/requirements.txt new file mode 100644 index 0000000..e45de24 --- /dev/null +++ b/introspection/modules/iam/requirements.txt @@ -0,0 +1,2 @@ +google-auth +google-api-python-client diff --git a/introspection/modules/iam/test_checks.py b/introspection/modules/iam/test_checks.py new file mode 100644 index 0000000..ce54b13 --- /dev/null +++ b/introspection/modules/iam/test_checks.py @@ -0,0 +1,105 @@ +import unittest +from unittest import mock +import os +from googleapiclient.errors import HttpError + +# Mock google.auth before importing checks +with mock.patch('google.auth.default', return_value=(mock.MagicMock(), None)): + from modules.iam.checks import check_service_account_exists, check_vm_service_account_roles, check_dataproc_service_agent_roles, run_checks, get_project_number + +class TestIamChecks(unittest.TestCase): + + def setUp(self): + self.mock_iam_service = mock.MagicMock() + self.mock_crm_service = mock.MagicMock() + self.project_id = "test-project" + self.project_number = "1234567890" + self.sa_email = "test-sa@test-project.iam.gserviceaccount.com" + self.dataproc_sa_email = f"service-{self.project_number}@dataproc-accounts.iam.gserviceaccount.com" + os.environ["DATAPROC_SA_EMAIL"] = self.sa_email + + # Mock get_project_number to avoid real calls + self.mock_get_project_number = mock.patch('modules.iam.checks.get_project_number').start() + self.mock_get_project_number.return_value = self.project_number + + def tearDown(self): + del os.environ["DATAPROC_SA_EMAIL"] + mock.patch.stopall() + + def test_check_service_account_exists_true(self): + self.mock_iam_service.projects().serviceAccounts().get().execute.return_value = {"email": self.sa_email} + success, msg = check_service_account_exists(self.mock_iam_service, self.project_id, self.sa_email) + self.assertTrue(success) + self.assertIn("exists", msg) + + def test_check_service_account_exists_false(self): + mock_resp = mock.MagicMock() + mock_resp.status = 404 + self.mock_iam_service.projects().serviceAccounts().get().execute.side_effect = HttpError(mock_resp, b"not found") + success, msg = check_service_account_exists(self.mock_iam_service, self.project_id, self.sa_email) + self.assertFalse(success) + self.assertIn("does not exist", msg) + + @mock.patch('modules.iam.checks.get_sa_roles') + def test_check_vm_sa_roles_good(self, mock_get_roles): + mock_get_roles.return_value = {"roles/dataproc.worker", "roles/bigquery.user"} + results = check_vm_service_account_roles(self.mock_crm_service, self.project_id, self.sa_email) + output = " +".join([msg for success, msg in results]) + self.assertIn("has required role 'roles/dataproc.worker'", output) + self.assertIn("has optional role 'roles/bigquery.user'", output) + self.assertIn("does not have 'roles/storage.admin'", output) + self.assertTrue(all(r[0] is not False for r in results)) # No Fails, None is ok + + @mock.patch('modules.iam.checks.get_sa_roles') + def test_check_vm_sa_roles_missing_required(self, mock_get_roles): + mock_get_roles.return_value = {"roles/storage.objectUser"} + results = check_vm_service_account_roles(self.mock_crm_service, self.project_id, self.sa_email) + output = " +".join([msg for success, msg in results]) + self.assertIn("MISSING required role 'roles/dataproc.worker'", output) + self.assertFalse(results[0][0]) + + @mock.patch('modules.iam.checks.get_sa_roles') + def test_check_vm_sa_roles_storage_admin(self, mock_get_roles): + mock_get_roles.return_value = {"roles/dataproc.worker", "roles/storage.admin"} + results = check_vm_service_account_roles(self.mock_crm_service, self.project_id, self.sa_email) + self.assertFalse(results[1][0]) # storage.admin check is the second result + self.assertIn("has 'roles/storage.admin', which is overly broad", results[1][1]) + + @mock.patch('modules.iam.checks.get_sa_roles') + def test_check_dataproc_sa_roles_good(self, mock_get_roles): + mock_get_roles.return_value = {"roles/dataproc.serviceAgent"} + results = check_dataproc_service_agent_roles(self.mock_crm_service, self.project_id, self.project_number) + self.assertTrue(results[0][0]) + self.assertIn(f"Dataproc SA 'service-{self.project_number}@dataproc-accounts.iam.gserviceaccount.com' has required role", results[0][1]) + + @mock.patch('modules.iam.checks.get_sa_roles') + def test_check_dataproc_sa_roles_missing(self, mock_get_roles): + mock_get_roles.return_value = {} + results = check_dataproc_service_agent_roles(self.mock_crm_service, self.project_id, self.project_number) + self.assertFalse(results[0][0]) + self.assertIn("MISSING required role 'roles/dataproc.serviceAgent'", results[0][1]) + + @mock.patch('modules.iam.checks.check_service_account_exists') + @mock.patch('modules.iam.checks.check_vm_service_account_roles') + @mock.patch('modules.iam.checks.check_dataproc_service_agent_roles') + def test_run_checks(self, mock_check_dp_sa, mock_check_vm_roles, mock_check_exists): + mock_check_exists.return_value = (True, "SA Exists") + mock_check_vm_roles.return_value = [(True, "VM Role Check Pass")] + mock_check_dp_sa.return_value = [(True, "DP SA Role Check Pass")] + + results = run_checks(None, self.mock_crm_service, self.mock_iam_service, self.project_id, "us-central1") + self.assertEqual(len(results), 4) # exists, vm roles, dp sa roles, cmek info + self.assertTrue(all(r[0] is not False for r in results)) + + def test_run_checks_missing_env(self): + del os.environ["DATAPROC_SA_EMAIL"] + self.mock_get_project_number.return_value = None + results = run_checks(None, self.mock_crm_service, self.mock_iam_service, self.project_id, "us-central1") + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn("DATAPROC_SA_EMAIL not set and could not form default SA name", results[0][1]) + +if __name__ == '__main__': + unittest.main() diff --git a/introspection/modules/network/Dockerfile b/introspection/modules/network/Dockerfile new file mode 100644 index 0000000..1f90476 --- /dev/null +++ b/introspection/modules/network/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.9-slim + +# Install gcloud +RUN apt-get update && apt-get install -y --no-install-recommends + curl + gnupg + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list + && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - + && apt-get update && apt-get install -y google-cloud-sdk + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy introspection framework and network module +COPY ../../inspect_env.py . +COPY . ./modules/network/ + +# Install Python dependencies for the network module +RUN pip install --no-cache-dir -r ./modules/network/requirements.txt + +# Gcloud is authenticated by the Cloud Build service account diff --git a/introspection/modules/network/README.md b/introspection/modules/network/README.md new file mode 100644 index 0000000..90773f8 --- /dev/null +++ b/introspection/modules/network/README.md @@ -0,0 +1,12 @@ +# Network Inspection Module + +This module checks network configurations relevant to Dataproc deployments. + +**Configuration:** + +Parameters can be set via environment variables directly or loaded from an `env.json` file specified by `INSPECT_ENV_CONFIG` (see main README). + +**Environment Variables / `env.json` keys:** + +* `NETWORK`: The name of the VPC network to inspect. +* `SUBNET`: The name of the Subnet to inspect. diff --git a/introspection/modules/network/checks.py b/introspection/modules/network/checks.py new file mode 100644 index 0000000..8558b1e --- /dev/null +++ b/introspection/modules/network/checks.py @@ -0,0 +1,81 @@ +import os +import google.auth +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +def check_vpc(service, project_id, network_name): + """Checks VPC existence and subnet mode.""" + results = [] + try: + network = service.networks().get(project=project_id, network=network_name).execute() + results.append((True, f"VPC '{network_name}' exists.")) + + if network.get('autoCreateSubnetworks', True): + results.append((False, f"VPC '{network_name}' is NOT in custom subnet mode (autoCreateSubnetworks is True).")) + else: + results.append((True, f"VPC '{network_name}' is in custom subnet mode.")) + except HttpError as e: + if e.resp.status == 404: + results.append((False, f"VPC '{network_name}' does not exist.")) + else: + results.append((False, f"Error checking VPC '{network_name}': {e}")) + return results + +def check_subnetwork(service, project_id, region, network_name, subnet_name): + """Checks Subnetwork existence and Private Google Access.""" + results = [] + try: + subnetwork = service.subnetworks().get(project=project_id, region=region, subnetwork=subnet_name).execute() + results.append((True, f"Subnetwork '{subnet_name}' exists.")) + + if subnetwork.get('network') != f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_name}": + results.append((False, f"Subnetwork '{subnet_name}' does not belong to VPC '{network_name}'.")) + return results + + if subnetwork.get('privateIpGoogleAccess', False): + results.append((True, f"Subnetwork '{subnet_name}' has Private Google Access enabled.")) + else: + results.append((False, f"Subnetwork '{subnet_name}' does not have Private Google Access enabled.")) + except HttpError as e: + if e.resp.status == 404: + results.append((False, f"Subnetwork '{subnet_name}' does not exist in region '{region}'.")) + else: + results.append((False, f"Error checking subnetwork '{subnet_name}': {e}")) + return results + +def run_checks(service, project_id, region): + """Main function to check network configuration.""" + project_id = os.environ.get("PROJECT_ID") + region = os.environ.get("REGION") + network_name = os.environ.get("NETWORK") + subnet_name = os.environ.get("SUBNET") + + if not all([project_id, region, network_name, subnet_name]): + print("[FAIL] Missing required environment variables: PROJECT_ID, REGION, NETWORK, SUBNET") + return + + try: + credentials, _ = google.auth.default() + service = build('compute', 'v1', credentials=credentials) + + + all_results = [] + network_name = os.environ.get("NETWORK") + subnet_name = os.environ.get("SUBNET") + + if not network_name: + all_results.append((False, "NETWORK environment variable not set.")) + return all_results + if not subnet_name: + all_results.append((False, "SUBNET environment variable not set.")) + return all_results + + all_results.extend(check_vpc(service, project_id, network_name)) + all_results.extend(check_subnetwork(service, project_id, region, network_name, subnet_name)) + + except google.auth.exceptions.DefaultCredentialsError: + print("[FAIL] Could not obtain Application Default Credentials.") + except Exception as e: + print(f"[FAIL] An unexpected error occurred: {e}") + + return all_results diff --git a/introspection/modules/network/cloudbuild.yaml b/introspection/modules/network/cloudbuild.yaml new file mode 100644 index 0000000..60c93b0 --- /dev/null +++ b/introspection/modules/network/cloudbuild.yaml @@ -0,0 +1,26 @@ +steps: + # Build the test container + - name: 'gcr.io/cloud-builders/docker' + id: 'docker-build' + args: ['build', '--tag=gcr.io/$PROJECT_ID/network-introspection-test:$BUILD_ID', '-f', 'modules/network/Dockerfile', '.'] + + # Push the image to GCR + - name: 'gcr.io/cloud-builders/docker' + id: 'docker-push' + args: ['push', 'gcr.io/$PROJECT_ID/network-introspection-test:$BUILD_ID'] + + # Run the integration tests for the network module + - name: 'gcr.io/$PROJECT_ID/network-introspection-test:$BUILD_ID' + id: 'run-tests' + entrypoint: 'python' + args: ['modules/network/integration_test.py'] + env: + - 'GOOGLE_CLOUD_PROJECT=$PROJECT_ID' + - 'REGION=${_REGION}' + - 'INSPECT_MODULES=network' # Ensure only network tests are run + waitFor: ['docker-push'] + +substitutions: + _REGION: 'us-central1' # Default region, can be overridden + +timeout: 1800s # 30 minutes diff --git a/introspection/modules/network/integration_test.py b/introspection/modules/network/integration_test.py new file mode 100644 index 0000000..a4f7321 --- /dev/null +++ b/introspection/modules/network/integration_test.py @@ -0,0 +1,128 @@ +import unittest +import os +import subprocess +import time +from googleapiclient import discovery +import google.auth + +class TestCheckNetworkIntegration(unittest.TestCase): + + @classmethod + def setUpClass(cls): + try: + cls.credentials, cls.project_id = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + cls.service = discovery.build('compute', 'v1', credentials=cls.credentials) + cls.region = os.environ.get('REGION') + if not cls.region: + raise EnvironmentError("REGION env var not set") + if not os.environ.get('GOOGLE_CLOUD_PROJECT'): + raise EnvironmentError("GOOGLE_CLOUD_PROJECT env var not set") + cls.project_id = os.environ['GOOGLE_CLOUD_PROJECT'] + except Exception as e: + raise unittest.SkipTest(f"GCP setup failed: {e}") + + def run_check_network(self, env_vars={}): + env = os.environ.copy() + env.update(env_vars) + # Ensure required env vars for check_network.py are set + env["PROJECT_ID"] = self.project_id + env["REGION"] = self.region + env["INSPECT_MODULES"] = "network" + + process = subprocess.Popen( + ['python', '../inspect_env.py'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=env + ) + stdout, stderr = process.communicate() + return process.returncode, stdout, stderr + + def _wait_for_operation(self, operation): + while operation['status'] != 'DONE': + time.sleep(2) + op_name = operation['name'] + try: + if 'zone' in operation: + operation = self.service.zoneOperations().get(project=self.project_id, zone=operation['zone'].split('/')[-1], operation=op_name).execute() + elif 'region' in operation: + operation = self.service.regionOperations().get(project=self.project_id, region=operation['region'].split('/')[-1], operation=op_name).execute() + else: + operation = self.service.globalOperations().get(project=self.project_id, operation=op_name).execute() + except Exception as e: + print(f"Error fetching operation {op_name}: {e}") + time.sleep(5) # Wait longer if get fails + continue + + if 'error' in operation: + raise Exception(f"Operation failed: {operation['error']}") + return operation + + def test_01_detect_existing_network(self): + network_name = f"test-net-exist-{int(time.time())}" + env_vars = { + "NETWORK": network_name, + "SUBNET": "dummy-subnet" + } + + # Setup: Create the network + try: + network_body = { + "name": network_name, + "autoCreateSubnetworks": False + } + insert_op = self.service.networks().insert(project=self.project_id, body=network_body).execute() + self._wait_for_operation(insert_op) + print(f"Network {network_name} created.") + + # Execute the tool + returncode, stdout, stderr = self.run_check_network(env_vars) + print(f"STDOUT: +{stdout}") + print(f"STDERR: +{stderr}") + + # Assertions + self.assertIn(f"Checking Network '{network_name}'", stdout) + self.assertIn(f"[PASS] VPC '{network_name}' exists.", stdout) + self.assertIn(f"[PASS] VPC '{network_name}' is in custom subnet mode.", stdout) + self.assertEqual(returncode, 0, f"check_network.py failed with stderr: {stderr}") + + finally: + # Teardown: Delete the network + try: + print(f"Deleting network {network_name}...") + delete_op = self.service.networks().delete(project=self.project_id, network=network_name).execute() + self._wait_for_operation(delete_op) + print(f"Network {network_name} deleted.") + except Exception as e: + print(f"Warning: Failed to delete network {network_name}: {e}") + + def test_02_detect_missing_network(self): + network_name = f"test-net-missing-{int(time.time())}" + env_vars = { + "NETWORK": network_name, + "SUBNET": "dummy-subnet" + } + + # Setup: Ensure network does not exist + + # Execute the tool + returncode, stdout, stderr = self.run_check_network(env_vars) + print(f"STDOUT: +{stdout}") + print(f"STDERR: +{stderr}") + + # Assertions + self.assertIn(f"Checking Network '{network_name}'", stdout) + self.assertIn(f"[FAIL] VPC '{network_name}' does not exist.", stdout) + # The script should return non-zero if any check fails + self.assertNotEqual(returncode, 0, "check_network.py should indicate failure") + + # TODO: Add tests for subnetwork creation/detection and PGA status + # TODO: Add tests for firewall rules + +if __name__ == '__main__': + unittest.main() diff --git a/introspection/modules/network/requirements.txt b/introspection/modules/network/requirements.txt new file mode 100644 index 0000000..e45de24 --- /dev/null +++ b/introspection/modules/network/requirements.txt @@ -0,0 +1,2 @@ +google-auth +google-api-python-client diff --git a/introspection/modules/network/test_checks.py b/introspection/modules/network/test_checks.py new file mode 100644 index 0000000..0cd888d --- /dev/null +++ b/introspection/modules/network/test_checks.py @@ -0,0 +1,159 @@ +import unittest +from unittest import mock +import os +from googleapiclient.errors import HttpError +import io + +# Mock google.auth before importing check_network +with mock.patch('google.auth.default', return_value=(mock.MagicMock(), None)): + from modules.network.checks import check_vpc, check_subnetwork, run_checks + +class TestCheckNetwork(unittest.TestCase): + + def setUp(self): + self.mock_service = mock.MagicMock() + self.project_id = "test-project" + self.region = "us-central1" + self.network_name = "test-network" + self.subnet_name = "test-subnet" + + os.environ["PROJECT_ID"] = self.project_id + os.environ["REGION"] = self.region + os.environ["NETWORK"] = self.network_name + os.environ["SUBNET"] = self.subnet_name + + def tearDown(self): + del os.environ["PROJECT_ID"] + del os.environ["REGION"] + del os.environ["NETWORK"] + del os.environ["SUBNET"] + + # --- check_vpc tests --- + def test_check_vpc_success(self): + self.mock_service.networks().get().execute.return_value = { + "name": self.network_name, + "autoCreateSubnetworks": False + } + results = check_vpc(self.mock_service, self.project_id, self.network_name) + self.assertEqual(len(results), 2) + self.assertTrue(results[0][0]) + self.assertIn(f"VPC '{self.network_name}' exists", results[0][1]) + self.assertTrue(results[1][0]) + self.assertIn("is in custom subnet mode", results[1][1]) + + def test_check_vpc_not_found(self): + mock_resp = mock.MagicMock() + mock_resp.status = 404 + self.mock_service.networks().get().execute.side_effect = HttpError(mock_resp, b"not found") + results = check_vpc(self.mock_service, self.project_id, self.network_name) + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn(f"VPC '{self.network_name}' does not exist", results[0][1]) + + def test_check_vpc_auto_mode(self): + self.mock_service.networks().get().execute.return_value = { + "name": self.network_name, + "autoCreateSubnetworks": True + } + results = check_vpc(self.mock_service, self.project_id, self.network_name) + self.assertEqual(len(results), 2) + self.assertTrue(results[0][0]) + self.assertFalse(results[1][0]) + self.assertIn("is NOT in custom subnet mode", results[1][1]) + + def test_check_vpc_other_error(self): + mock_resp = mock.MagicMock() + mock_resp.status = 500 + self.mock_service.networks().get().execute.side_effect = HttpError(mock_resp, b"internal error") + results = check_vpc(self.mock_service, self.project_id, self.network_name) + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn("Error checking VPC", results[0][1]) + + # --- check_subnetwork tests --- + def test_check_subnetwork_success(self): + self.mock_service.subnetworks().get().execute.return_value = { + "name": self.subnet_name, + "network": f"https://www.googleapis.com/compute/v1/projects/{self.project_id}/global/networks/{self.network_name}", + "privateIpGoogleAccess": True + } + results = check_subnetwork(self.mock_service, self.project_id, self.region, self.network_name, self.subnet_name) + self.assertEqual(len(results), 2) + self.assertTrue(results[0][0]) + self.assertIn(f"Subnetwork '{self.subnet_name}' exists", results[0][1]) + self.assertTrue(results[1][0]) + self.assertIn("has Private Google Access enabled", results[1][1]) + + def test_check_subnetwork_not_found(self): + mock_resp = mock.MagicMock() + mock_resp.status = 404 + self.mock_service.subnetworks().get().execute.side_effect = HttpError(mock_resp, b"not found") + results = check_subnetwork(self.mock_service, self.project_id, self.region, self.network_name, self.subnet_name) + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn(f"Subnetwork '{self.subnet_name}' does not exist", results[0][1]) + + def test_check_subnetwork_pga_disabled(self): + self.mock_service.subnetworks().get().execute.return_value = { + "name": self.subnet_name, + "network": f"https://www.googleapis.com/compute/v1/projects/{self.project_id}/global/networks/{self.network_name}", + "privateIpGoogleAccess": False + } + results = check_subnetwork(self.mock_service, self.project_id, self.region, self.network_name, self.subnet_name) + self.assertEqual(len(results), 2) + self.assertTrue(results[0][0]) + self.assertFalse(results[1][0]) + self.assertIn("does not have Private Google Access enabled", results[1][1]) + + def test_check_subnetwork_wrong_network(self): + self.mock_service.subnetworks().get().execute.return_value = { + "name": self.subnet_name, + "network": f"https://www.googleapis.com/compute/v1/projects/{self.project_id}/global/networks/other-network", + "privateIpGoogleAccess": True + } + results = check_subnetwork(self.mock_service, self.project_id, self.region, self.network_name, self.subnet_name) + self.assertEqual(len(results), 2) + self.assertTrue(results[0][0]) + self.assertFalse(results[1][0]) + self.assertIn(f"Subnetwork '{self.subnet_name}' does not belong to VPC '{self.network_name}'", results[1][1]) + + def test_check_subnetwork_other_error(self): + mock_resp = mock.MagicMock() + mock_resp.status = 500 + self.mock_service.subnetworks().get().execute.side_effect = HttpError(mock_resp, b"internal error") + results = check_subnetwork(self.mock_service, self.project_id, self.region, self.network_name, self.subnet_name) + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn("Error checking subnetwork", results[0][1]) + + # --- run_checks tests --- + def test_run_checks_success(self): + self.mock_service.networks().get().execute.return_value = { + "name": self.network_name, "autoCreateSubnetworks": False + } + self.mock_service.subnetworks().get().execute.return_value = { + "name": self.subnet_name, + "network": f"https://www.googleapis.com/compute/v1/projects/{self.project_id}/global/networks/{self.network_name}", + "privateIpGoogleAccess": True + } + + results = run_checks(self.mock_service, self.project_id, self.region) + output = " +".join([msg for success, msg in results]) + self.assertIn(f"VPC '{self.network_name}' exists.", output) + self.assertIn(f"VPC '{self.network_name}' is in custom subnet mode.", output) + self.assertIn(f"Subnetwork '{self.subnet_name}' exists.", output) + self.assertIn(f"Subnetwork '{self.subnet_name}' has Private Google Access enabled.", output) + self.assertTrue(all(success for success, msg in results)) + + def test_run_checks_missing_env(self): + del os.environ["NETWORK"] + results = run_checks(self.mock_service, self.project_id, self.region) + output = " +".join([msg for success, msg in results]) + self.assertIn("NETWORK environment variable not set", output) + self.assertFalse(any(success for success, msg in results)) + +if __name__ == '__main__': + print("Running tests: python -m unittest modules.network.test_checks") + unittest.main() diff --git a/introspection/modules/storage/README.md b/introspection/modules/storage/README.md new file mode 100644 index 0000000..74a295a --- /dev/null +++ b/introspection/modules/storage/README.md @@ -0,0 +1,16 @@ +# Storage Inspection Module + +This module checks for common configuration issues related to Google Cloud Storage (GCS) usage with Dataproc. + +## Checks Performed + +1. **Bucket Permissions:** Verifies that the Dataproc VM Service Account has sufficient permissions on specified GCS buckets. + +## Configuration + +Parameters can be set via environment variables directly or loaded from an `env.json` file specified by `INSPECT_ENV_CONFIG` (see main README). + +**Environment Variables / `env.json` keys:** + +- `DATAPROC_SA_EMAIL`: The email address of the service account used by the Dataproc cluster VMs. +- `DATAPROC_GCS_BUCKETS`: A comma-separated string of GCS bucket names that the Dataproc cluster interacts with. This can be set directly, or it will be populated from `BUCKET` and `TEMP_BUCKET` if using an `env.json` file. diff --git a/introspection/modules/storage/checks.py b/introspection/modules/storage/checks.py new file mode 100644 index 0000000..4e9edf8 --- /dev/null +++ b/introspection/modules/storage/checks.py @@ -0,0 +1,78 @@ +import os +from googleapiclient.errors import HttpError + +def check_bucket_iam_policy(storage_service, bucket_name, sa_email): + """Checks if the service account has sufficient permissions on the GCS bucket.""" + results = [] + member = f"serviceAccount:{sa_email}" + required_permissions = { + "storage.objects.get", + "storage.objects.create", + "storage.objects.list", + # storage.objects.delete is not strictly required for most jobs + } + + try: + policy = storage_service.buckets().getIamPolicy(bucket=bucket_name).execute() + bindings = policy.get('bindings', []) + + found_roles = set() + for binding in bindings: + role = binding.get('role') + if member in binding.get('members', []): + found_roles.add(role) + + # This is a simplification. Ideally, we'd expand roles into permissions. + # For now, we check for common roles granting the required permissions. + sufficient_roles = { + "roles/storage.objectAdmin", + "roles/storage.objectUser", + "roles/storage.admin", + "roles/owner", + "roles/editor", + } + + has_sufficient_role = any(role in found_roles for role in sufficient_roles) + + if has_sufficient_role: + results.append((True, f"VM SA '{sa_email}' appears to have sufficient roles ({found_roles}) on bucket '{bucket_name}'.")) + else: + # Check for custom roles - This requires permission service + results.append((False, f"VM SA '{sa_email}' may lack required object permissions (get, create, list) on bucket '{bucket_name}'. Found roles: {found_roles}. Consider granting 'roles/storage.objectUser'.")) + + except HttpError as e: + if e.resp.status == 403: + results.append((False, f"Permission denied to check IAM policy on bucket '{bucket_name}'. You need 'storage.buckets.getIamPolicy' permission.")) + elif e.resp.status == 404: + results.append((False, f"Bucket '{bucket_name}' not found.")) + else: + results.append((False, f"Error checking IAM policy for bucket '{bucket_name}': {e}")) + except Exception as e: + results.append((False, f"Error checking IAM policy for bucket '{bucket_name}': {e}")) + + return results + +def run_checks(storage_service, project_id, region, sa_email): + """Runs all storage checks.""" + all_results = [] + bucket_names_str = os.environ.get("DATAPROC_GCS_BUCKETS") + + if not sa_email: + all_results.append((False, "VM Service Account email not provided to storage module.")) + return all_results + + if not bucket_names_str: + all_results.append((None, "INFO: DATAPROC_GCS_BUCKETS environment variable not set. Skipping bucket IAM checks.")) + return all_results + + bucket_names = [b.strip() for b in bucket_names_str.split(',') if b.strip()] + if not bucket_names: + all_results.append((None, "INFO: DATAPROC_GCS_BUCKETS is set but contains no bucket names.")) + return all_results + + all_results.append((None, f"INFO: Checking bucket permissions for SA '{sa_email}' on: {', '.join(bucket_names)}")) + + for bucket_name in bucket_names: + all_results.extend(check_bucket_iam_policy(storage_service, bucket_name, sa_email)) + + return all_results diff --git a/introspection/modules/storage/requirements.txt b/introspection/modules/storage/requirements.txt new file mode 100644 index 0000000..d8055e0 --- /dev/null +++ b/introspection/modules/storage/requirements.txt @@ -0,0 +1 @@ +google-api-python-client diff --git a/introspection/modules/storage/test_checks.py b/introspection/modules/storage/test_checks.py new file mode 100644 index 0000000..e14fffc --- /dev/null +++ b/introspection/modules/storage/test_checks.py @@ -0,0 +1,85 @@ +import unittest +from unittest import mock +import os +from googleapiclient.errors import HttpError + +# Mock google.auth before importing checks +with mock.patch('google.auth.default', return_value=(mock.MagicMock(), None)): + from modules.storage.checks import check_bucket_iam_policy, run_checks + +class TestStorageChecks(unittest.TestCase): + + def setUp(self): + self.mock_storage_service = mock.MagicMock() + self.project_id = "test-project" + self.sa_email = "test-sa@test-project.iam.gserviceaccount.com" + self.bucket_name = "test-bucket" + + def test_check_bucket_iam_policy_sufficient(self): + self.mock_storage_service.buckets().getIamPolicy().execute.return_value = { + "bindings": [ + {"role": "roles/storage.objectUser", "members": [f"serviceAccount:{self.sa_email}"]}, + ] + } + results = check_bucket_iam_policy(self.mock_storage_service, self.bucket_name, self.sa_email) + self.assertTrue(results[0][0]) + self.assertIn("sufficient roles", results[0][1]) + + def test_check_bucket_iam_policy_insufficient(self): + self.mock_storage_service.buckets().getIamPolicy().execute.return_value = { + "bindings": [ + {"role": "roles/storage.objectViewer", "members": [f"serviceAccount:{self.sa_email}"]}, + ] + } + results = check_bucket_iam_policy(self.mock_storage_service, self.bucket_name, self.sa_email) + self.assertFalse(results[0][0]) + self.assertIn("may lack required object permissions", results[0][1]) + + def test_check_bucket_iam_policy_not_found(self): + mock_resp = mock.MagicMock() + mock_resp.status = 404 + self.mock_storage_service.buckets().getIamPolicy().execute.side_effect = HttpError(mock_resp, b"not found") + results = check_bucket_iam_policy(self.mock_storage_service, self.bucket_name, self.sa_email) + self.assertFalse(results[0][0]) + self.assertIn("not found", results[0][1]) + + def test_check_bucket_iam_policy_permission_denied(self): + mock_resp = mock.MagicMock() + mock_resp.status = 403 + self.mock_storage_service.buckets().getIamPolicy().execute.side_effect = HttpError(mock_resp, b"forbidden") + results = check_bucket_iam_policy(self.mock_storage_service, self.bucket_name, self.sa_email) + self.assertFalse(results[0][0]) + self.assertIn("Permission denied to check IAM policy", results[0][1]) + + @mock.patch.dict(os.environ, {"DATAPROC_GCS_BUCKETS": "bucket1, bucket2"}) + @mock.patch('modules.storage.checks.check_bucket_iam_policy') + def test_run_checks_with_buckets(self, mock_check_bucket): + mock_check_bucket.return_value = [(True, "Bucket Check Pass")] + results = run_checks(self.mock_storage_service, self.project_id, "us-central1", self.sa_email) + self.assertEqual(mock_check_bucket.call_count, 2) + mock_check_bucket.assert_any_call(self.mock_storage_service, "bucket1", self.sa_email) + mock_check_bucket.assert_any_call(self.mock_storage_service, "bucket2", self.sa_email) + # INFO message + 2 * results from mock_check_bucket + self.assertEqual(len(results), 3) + + def test_run_checks_no_env_var(self): + results = run_checks(self.mock_storage_service, self.project_id, "us-central1", self.sa_email) + self.assertEqual(len(results), 1) + self.assertIsNone(results[0][0]) + self.assertIn("DATAPROC_GCS_BUCKETS environment variable not set", results[0][1]) + + @mock.patch.dict(os.environ, {"DATAPROC_GCS_BUCKETS": " "}) + def test_run_checks_empty_env_var(self): + results = run_checks(self.mock_storage_service, self.project_id, "us-central1", self.sa_email) + self.assertEqual(len(results), 1) + self.assertIsNone(results[0][0]) + self.assertIn("DATAPROC_GCS_BUCKETS is set but contains no bucket names", results[0][1]) + + def test_run_checks_no_sa_email(self): + results = run_checks(self.mock_storage_service, self.project_id, "us-central1", None) + self.assertEqual(len(results), 1) + self.assertFalse(results[0][0]) + self.assertIn("VM Service Account email not provided", results[0][1]) + +if __name__ == '__main__': + unittest.main()