diff --git a/core-platform/README.md b/core-platform/README.md index 86b167a..2868999 100644 --- a/core-platform/README.md +++ b/core-platform/README.md @@ -1 +1,5 @@ # Core Platform + +Projects and utilities: + +- [Serverless](serverless) diff --git a/core-platform/serverless/README.md b/core-platform/serverless/README.md new file mode 100644 index 0000000..20d4835 --- /dev/null +++ b/core-platform/serverless/README.md @@ -0,0 +1,5 @@ +# Serverless + +Projects in this category: + +- [Budget Policy Drift Tracker](./budget-policy-drift-tracker) diff --git a/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker(with Test Data).ipynb b/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker(with Test Data).ipynb new file mode 100644 index 0000000..a229e41 --- /dev/null +++ b/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker(with Test Data).ipynb @@ -0,0 +1,1810 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "928b4868-9b3e-4933-9885-1fa7e397da39", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Notebook Overview\n", + "\n", + "This notebook demonstrates how to retrieve Databricks budget policies using the Account API, extract and normalize custom tags, and register the results in a Delta table for downstream analysis. \n", + "\n", + "**Key steps:**\n", + "- Authenticate with Azure AD using a service principal.\n", + "- Query the Databricks Account API for budget policies.\n", + "- Normalize custom tags into columns for easy querying.\n", + "- Store results in a Delta table using Unity Catalog.\n", + "- Compare expected vs. actual policy tags for validation.\n", + "\n", + "**Parameters:**\n", + "- `uc_catalog`: Unity Catalog catalog name (set via widget)\n", + "- `uc_schema`: Unity Catalog schema name (set via widget)\n", + "\n", + "**Requirements:**\n", + "- Service principal with Account Admin role.\n", + "- Access to Databricks Account API.\n", + "\n", + "**Outputs:**\n", + "- Delta table: `serverless_policies_registry` in the specified catalog and schema.\n", + "\n", + "**Usage:**\n", + "- Update widgets for catalog and schema as needed.\n", + "- Run cells sequentially for end-to-end workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ab8182fc-b6dc-4981-baf8-c79e8182d616", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "**Step `1`**: create this as scheduled job to populate serverless_policies_registry_live table" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "6f49095b-6500-49a5-b649-45ab3d737b74", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "\n", + "# Parameters required for Databricks Account API authentication\n", + "# - account_id: Databricks Account ID (can be found in the Databricks Account Console)\n", + "# - tenant_id: Azure AD Tenant ID (e.g. \"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\")\n", + "# - client_id: Azure AD Service Principal Client ID\n", + "# - client_secret: Azure AD Service Principal Client Secret\n", + "\n", + "account_id = \"\"\n", + "\n", + "# Azure AD Service Principal credentials (required for account-level APIs)\n", + "# The service principal must have Account Admin role\n", + "tenant_id = \"\" # e.g. \"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"\n", + "client_id = \"\" \n", + "client_secret = \"\"\n", + "\n", + "# Step 1: Get Azure AD token for Databricks resource\n", + "def get_azure_ad_token(tenant_id, client_id, client_secret):\n", + " url = f\"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token\"\n", + " payload = {\n", + " \"grant_type\": \"client_credentials\",\n", + " \"client_id\": client_id,\n", + " \"client_secret\": client_secret,\n", + " \"scope\": \"2ff814a6-3304-4ab8-85cb-cd0e6f879c1d/.default\" # Azure Databricks resource\n", + " }\n", + " response = requests.post(url, data=payload)\n", + " if response.status_code == 200:\n", + " return response.json()[\"access_token\"]\n", + " else:\n", + " raise Exception(f\"Failed to get token: {response.text}\")\n", + "\n", + "# Step 2: List budget policies\n", + "def get_budget_policies(account_id, token):\n", + " url = f\"https://accounts.azuredatabricks.net/api/2.1/accounts/{account_id}/budget-policies\"\n", + " headers = {\"Authorization\": f\"Bearer {token}\"}\n", + " all_policies = []\n", + " \n", + " while url:\n", + " response = requests.get(url, headers=headers)\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " all_policies.extend(data.get('policies', []))\n", + " # Handle pagination\n", + " next_token = data.get('next_page_token')\n", + " url = f\"https://accounts.azuredatabricks.net/api/2.1/accounts/{account_id}/budget-policies?page_token={next_token}\" if next_token else None\n", + " else:\n", + " print(f\"Error {response.status_code}: {response.text}\")\n", + " break\n", + " \n", + " return all_policies\n", + "\n", + "# Execute\n", + "token = get_azure_ad_token(tenant_id, client_id, client_secret)\n", + "budget_policies = get_budget_policies(account_id, token)\n", + "\n", + "print(f\"Found {len(budget_policies)} budget policies\")\n", + "df = pd.DataFrame(budget_policies)\n", + "if not df.empty:\n", + " display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "4cba46b3-e49d-4db3-b210-395dfa880ec3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# After creating the pandas DataFrame `df` from the budget policies\n", + "if not df.empty:\n", + " # Keep only the columns we need\n", + " tags_df = df[['policy_id','policy_name', 'custom_tags', 'binding_workspace_ids']].copy()\n", + "\n", + " # Convert the list of tag objects into a dict {key: value}\n", + " tags_dict_series = tags_df['custom_tags'].apply(\n", + " lambda tags: {t['key']: t['value'] for t in tags}\n", + " if isinstance(tags, list) else {}\n", + " )\n", + "\n", + " # Normalize the dicts into separate columns (one per tag key)\n", + " tags_expanded = pd.json_normalize(tags_dict_series)\n", + "\n", + " # Convert workspace_ids array to comma-separated string\n", + " workspace_ids_series = tags_df['binding_workspace_ids'].apply(\n", + " lambda ids: \",\".join(map(str, ids)) if isinstance(ids, list) else \"\"\n", + " ).rename('workspace_ids')\n", + "\n", + " # Combine policy_id, policy_name, workspace_ids with the expanded tag columns\n", + " result_df = pd.concat([\n", + " tags_df['policy_id'],\n", + " tags_df['policy_name'],\n", + " tags_expanded,\n", + " workspace_ids_series\n", + " ], axis=1)\n", + "\n", + " # Display the final table\n", + " display(result_df)\n", + "\n", + " # Write to Delta table serverless_policies_registry_live\n", + " spark_df = spark.createDataFrame(result_df)\n", + " spark_df.write.format(\"delta\") \\\n", + " .mode(\"overwrite\") \\\n", + " .option(\"overwriteSchema\", \"true\") \\\n", + " .saveAsTable(f\"{UC_PREFIX}.serverless_policies_registry_live\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "71d6792a-441d-4c4a-be71-69c5862fc9c6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "\n", + "**Step `2`**: Create sample dataset for `serverless_policies_registry`\n", + "\n", + "The sample dataset contains budget policy records, each with a `policy_id` and associated custom tags such as `division`, `department`, `environment`, and `service_name`. These tags represent metadata for each policy and are used to compare expected vs. actual values in downstream analysis. The dataset includes both matching and non-matching rows to validate tag normalization and policy registry accuracy.\n", + "\n", + "> **Note:** Update the custom tags in this dataset to match your organization's requirements and naming conventions for serverless policies." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f0f34153-36dd-4bce-bb5c-ec56e9aa6d57", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.databricks.empty-table+json": { + "directive_name": "CreateTable" + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema);\n", + "CREATE TABLE IF NOT EXISTS serverless_policies_registry (\n", + " policy_id STRING,\n", + " policy_name STRING,\n", + " division STRING,\n", + " department STRING,\n", + " environment STRING,\n", + " service_name STRING,\n", + " workspace_ids ARRAY,\n", + " managers ARRAY,\n", + " users ARRAY,\n", + " compliance_status STRING,\n", + " updated_at TIMESTAMP\n", + ") USING DELTA;\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "64fb8ffe-6e8d-4477-afd2-ec55f1d39b8e", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
policy_idpolicy_namedivisiondepartmentenvironmentservice_nameworkspace_idsmanagersuserscompliance_statusupdated_at
policy_001Finance PolicyFinanceAccountingProdBillingList(1001, 1002)List(alice@example.com)List(bob@example.com, carol@example.com)approved2026-05-08T00:00:00.000Z
policy_002Engineering PolicyEngineeringPlatformStagingDataPipelineList(2001)List(dave@example.com)List(eve@example.com)pending2026-05-08T00:00:00.000Z
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "policy_001", + "Finance Policy", + "Finance", + "Accounting", + "Prod", + "Billing", + [ + 1001, + 1002 + ], + [ + "alice@example.com" + ], + [ + "bob@example.com", + "carol@example.com" + ], + "approved", + "2026-05-08T00:00:00.000Z" + ], + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Platform", + "Staging", + "DataPipeline", + [ + 2001 + ], + [ + "dave@example.com" + ], + [ + "eve@example.com" + ], + "pending", + "2026-05-08T00:00:00.000Z" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "policy_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "policy_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "workspace_ids", + "type": "{\"containsNull\":true,\"elementType\":\"long\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "managers", + "type": "{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "users", + "type": "{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "compliance_status", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "updated_at", + "type": "\"timestamp\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Sample budget policies with the required custom tags\n", + "sample_policies = [\n", + " {\n", + " \"policy_id\": \"policy_001\",\n", + " \"policy_name\": \"Finance Policy\",\n", + " \"custom_tags\": [\n", + " {\"key\": \"division\", \"value\": \"Finance\"},\n", + " {\"key\": \"department\", \"value\": \"Accounting\"},\n", + " {\"key\": \"environment\", \"value\": \"Prod\"},\n", + " {\"key\": \"service_name\", \"value\": \"Billing\"}\n", + " ],\n", + " \"workspace_ids\": [1001, 1002],\n", + " \"managers\": [\"alice@example.com\"],\n", + " \"users\": [\"bob@example.com\", \"carol@example.com\"],\n", + " \"compliance_status\": \"approved\",\n", + " \"updated_at\": pd.Timestamp(\"2026-05-08\")\n", + " },\n", + " {\n", + " \"policy_id\": \"policy_002\",\n", + " \"policy_name\": \"Engineering Policy\",\n", + " \"custom_tags\": [\n", + " {\"key\": \"division\", \"value\": \"Engineering\"},\n", + " {\"key\": \"department\", \"value\": \"Platform\"},\n", + " {\"key\": \"environment\", \"value\": \"Staging\"},\n", + " {\"key\": \"service_name\", \"value\": \"DataPipeline\"}\n", + " ],\n", + " \"workspace_ids\": [2001],\n", + " \"managers\": [\"dave@example.com\"],\n", + " \"users\": [\"eve@example.com\"],\n", + " \"compliance_status\": \"pending\",\n", + " \"updated_at\": pd.Timestamp(\"2026-05-08\")\n", + " }\n", + "]\n", + "\n", + "# Normalize custom_tags into columns\n", + "df = pd.DataFrame(sample_policies)\n", + "tags_dict_series = df['custom_tags'].apply(\n", + " lambda tags: {t['key']: t['value'] for t in tags} if isinstance(tags, list) else {}\n", + ")\n", + "tags_expanded = pd.json_normalize(tags_dict_series)\n", + "\n", + "# Combine with other columns as per schema\n", + "result_df = pd.concat([\n", + " df['policy_id'],\n", + " df['policy_name'],\n", + " tags_expanded,\n", + " df['workspace_ids'],\n", + " df['managers'],\n", + " df['users'],\n", + " df['compliance_status'],\n", + " df['updated_at']\n", + "], axis=1)\n", + "\n", + "# Convert to Spark DataFrame and write to Delta table\n", + "spark_df = spark.createDataFrame(result_df)\n", + "spark_df.write.format(\"delta\") \\\n", + " .mode(\"overwrite\") \\\n", + " .option(\"overwriteSchema\", \"true\") \\\n", + " .saveAsTable(f\"{UC_PREFIX}.serverless_policies_registry\")\n", + "\n", + "display(spark.table(f\"{UC_PREFIX}.serverless_policies_registry\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8cfd91f8-1e8a-41af-bf0b-0b02bea877eb", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"uc_catalog\", spark.catalog.currentCatalog())\n", + "dbutils.widgets.text(\"uc_schema\", spark.catalog.currentDatabase())\n", + "\n", + "UC_CATALOG = dbutils.widgets.get(\"uc_catalog\")\n", + "UC_SCHEMA = dbutils.widgets.get(\"uc_schema\")\n", + "\n", + "UC_PREFIX = f\"{UC_CATALOG}.{UC_SCHEMA}\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "099cbe71-3b7c-4d5b-9e6e-db7b43e94c1f", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "**Step 3:** For testing, we use dummy data for serverless_policies_registry_live.\n", + " In production, customers should use the serverless_policies_registry_live table created in Step 1 (populated by the scheduled job)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "e20baf2c-3e83-4377-9519-32a82f342db6", + "showTitle": false, + "tableResultSettingsMap": { + "0": { + "dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1777567866833}", + "filterBlob": null, + "queryPlanFiltersBlob": null, + "tableResultIndex": 0 + } + }, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
policy_idpolicy_namedivisiondepartmentenvironmentservice_name
policy_001Finance PolicyFinanceAccountingProdBilling
policy_002Engineering PolicyEngineeringPlatformStagingDataPipeline
policy_001Finance PolicyFinanceAccountingProdBilling
policy_002Engineering PolicyEngineeringAdminStagingDataPipeline
policy_001Finance PolicyFinanceAccountingProdBilling
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "policy_001", + "Finance Policy", + "Finance", + "Accounting", + "Prod", + "Billing" + ], + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Platform", + "Staging", + "DataPipeline" + ], + [ + "policy_001", + "Finance Policy", + "Finance", + "Accounting", + "Prod", + "Billing" + ], + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Admin", + "Staging", + "DataPipeline" + ], + [ + "policy_001", + "Finance Policy", + "Finance", + "Accounting", + "Prod", + "Billing" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "policy_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "policy_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "service_name", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# Define the target table name\n", + "target_table = f\"{UC_PREFIX}.serverless_policies_registry_live_dummy\" ##Replace with target_table = f\"{UC_PREFIX}.serverless_policies_registry_live\"\n", + "\n", + "# Prepare 10 rows; 3 rows contain values not present in the source table\n", + "rows = [\n", + " # Existing‑like rows\n", + " {\"policy_id\": \"policy_001\", \"policy_name\": \"Finance Policy\", \"division\": \"Finance\", \"department\": \"Accounting\", \"environment\": \"Prod\", \"service_name\": \"Billing\"},\n", + " {\"policy_id\": \"policy_002\", \"policy_name\": \"Engineering Policy\", \"division\": \"Engineering\", \"department\": \"Platform\", \"environment\": \"Staging\", \"service_name\": \"DataPipeline\"},\n", + " {\"policy_id\": \"policy_001\", \"policy_name\": \"Finance Policy\", \"division\": \"Finance\", \"department\": \"Accounting\", \"environment\": \"Prod\", \"service_name\": \"Billing\"},\n", + " {\"policy_id\": \"policy_002\", \"policy_name\": \"Engineering Policy\", \"division\": \"Engineering\", \"department\": \"Admin\", \"environment\": \"Staging\", \"service_name\": \"DataPipeline\"},\n", + " {\"policy_id\": \"policy_001\", \"policy_name\": \"Finance Policy\", \"division\": \"Finance\", \"department\": \"Accounting\", \"environment\": \"Prod\", \"service_name\": \"Billing\"},\n", + " \n", + "]\n", + "\n", + "# Create Spark DataFrame including policy_name\n", + "spark_df = spark.createDataFrame(rows)\n", + "\n", + "# Reorder columns as requested\n", + "ordered_cols = [\"policy_id\", \"policy_name\", \"division\", \"department\", \"environment\", \"service_name\"]\n", + "spark_df = spark_df.select(*ordered_cols)\n", + "\n", + "# Write the DataFrame as a Delta table (overwrite if it already exists)\n", + "spark_df.write.format(\"delta\") \\\n", + " .mode(\"overwrite\") \\\n", + " .option(\"overwriteSchema\", \"true\") \\\n", + " .saveAsTable(target_table)\n", + "\n", + "# Verify the new table content\n", + "display(spark.table(target_table).select(*ordered_cols))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "0d94381f-d0fa-4d7f-a020-ebef2c536fe0", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Set A — Policy drift\n", + "Detects when central-defined policies (tags, workspace bindings, managers) change in production.\n", + "Sample SQL (drift on tags):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "73948a4a-c848-46f3-a407-be2681974b08", + "showTitle": false, + "tableResultSettingsMap": { + "0": { + "dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1777567982835}", + "filterBlob": null, + "queryPlanFiltersBlob": null, + "tableResultIndex": 0 + } + }, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
policy_idpolicy_nameexpected_divisionactual_divisionexpected_departmentactual_departmentexpected_environmentactual_environmentexpected_service_nameactual_service_name
policy_002Engineering PolicyEngineeringEngineeringPlatformAdminStagingStagingDataPipelineDataPipeline
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Engineering", + "Platform", + "Admin", + "Staging", + "Staging", + "DataPipeline", + "DataPipeline" + ] + ], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": {}, + "name": "policy_id", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "policy_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_service_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_service_name", + "nullable": true, + "type": "string" + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 99 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "policy_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "policy_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_service_name", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema); \n", + "\n", + "WITH live AS (\n", + " SELECT\n", + " policy_id,\n", + " policy_name,\n", + " division,\n", + " department,\n", + " environment,\n", + " service_name\n", + " FROM serverless_policies_registry_live_dummy -- populated by a daily Job calling the Account API,replace with serverless_policies_registry_live\n", + ")\n", + "SELECT\n", + " r.policy_id,\n", + " r.policy_name,\n", + " r.division AS expected_division,\n", + " l.division AS actual_division,\n", + " r.department AS expected_department,\n", + " l.department AS actual_department,\n", + " r.environment AS expected_environment,\n", + " l.environment AS actual_environment,\n", + " r.service_name AS expected_service_name,\n", + " l.service_name AS actual_service_name\n", + "FROM serverless_policies_registry r\n", + "JOIN live l\n", + " ON l.policy_name = r.policy_name\n", + "WHERE r.division <> l.division\n", + " OR r.department <> l.department\n", + " OR r.environment <> l.environment\n", + " OR r.service_name <> l.service_name " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "44e1ba36-470d-49da-b73d-52f1cd808c8f", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Sample SQL (drift from audit logs):****" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d84c54df-dc54-4abb-9c6e-726a232d52e9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
event_timeactoraction_namerequest_params
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": { + "comment": "Timestamp of the event" + }, + "name": "event_time", + "nullable": true, + "type": "timestamp" + }, + { + "metadata": {}, + "name": "actor", + "nullable": true, + "type": "string" + }, + { + "metadata": { + "comment": "The name of the action that has been performed as part of the audit event. Action names vary depending on the Databricks service (service_name). See [documentation of actions per service](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details." + }, + "name": "action_name", + "nullable": true, + "type": "string" + }, + { + "metadata": { + "comment": "A map of key/value pairs with the request parameters. Request parameters vary by request type. See [documentation of request parameters per action](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details." + }, + "name": "request_params", + "nullable": true, + "type": { + "keyType": "string", + "type": "map", + "valueContainsNull": true, + "valueType": "string" + } + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 19 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{\"comment\": \"Timestamp of the event\"}", + "name": "event_time", + "type": "\"timestamp\"" + }, + { + "metadata": "{}", + "name": "actor", + "type": "\"string\"" + }, + { + "metadata": "{\"comment\": \"The name of the action that has been performed as part of the audit event. Action names vary depending on the Databricks service (service_name). See [documentation of actions per service](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details.\"}", + "name": "action_name", + "type": "\"string\"" + }, + { + "metadata": "{\"comment\": \"A map of key/value pairs with the request parameters. Request parameters vary by request type. See [documentation of request parameters per action](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details.\"}", + "name": "request_params", + "type": "{\"keyType\":\"string\",\"type\":\"map\",\"valueContainsNull\":true,\"valueType\":\"string\"}" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "SELECT\n", + " event_time,\n", + " user_identity.email AS actor,\n", + " action_name,\n", + " request_params\n", + "FROM system.access.audit\n", + "WHERE service_name = 'accounts'\n", + " AND action_name IN ('updateBudgetPolicy', 'deleteBudgetPolicy', 'updateRuleSet')\n", + " AND event_date >= current_date() - INTERVAL 1 DAY\n", + "ORDER BY event_time DESC;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "816a6cf2-265c-4f5d-8c69-4c9d501933c5", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Set B — Usage classification on system.billing.usage\n", + "\n", + "The notebook creates a sample Delta table `usage_dummy` to simulate serverless usage data for demonstration and testing purposes. This table includes columns such as `usage_date`, `workspace_id`, `identity_metadata`, `usage_metadata`, `custom_tags`, `usage_quantity`, `usage_unit`, and `billing_origin_product`, closely matching the schema of the real `system.billing.usage` table.\n", + "\n", + "**Note:** When adapting this workflow for production or customer environments, replace all references to `usage_dummy` with the actual `system.billing.usage` table.\n", + "\n", + "Classifies every serverless usage row as central-approved / central-drifted / workspace-created / default (no policy)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "daf14c85-c77c-4c67-a577-6faca2c12120", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
usage_dateworkspace_ididentity_metadatausage_metadatacustom_tagsusage_quantityusage_unitbilling_origin_product
2026-05-08ws_001Map(run_as -> user1)Map(budget_policy_id -> policy_001, cluster_id -> null)Map(division -> Finance, department -> Accounting, environment -> Prod, service_name -> Billing)10.0DBUJOBS
2026-05-08ws_002Map(run_as -> user2)Map(budget_policy_id -> policy_002, cluster_id -> null)Map(division -> Engineering, department -> Admin, environment -> Staging, service_name -> DataPipeline)20.0DBUSQL
2026-05-08ws_003Map(run_as -> user3)Map(budget_policy_id -> policy_999, cluster_id -> null)Map(division -> HR, department -> Recruiting, environment -> Dev, service_name -> Onboarding)5.0DBUINTERACTIVE
2026-05-08ws_004Map(run_as -> user4)Map(budget_policy_id -> null, cluster_id -> null)Map(division -> Legal, department -> Compliance, environment -> Test, service_name -> Audit)8.0DBUMODEL_SERVING
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "2026-05-08", + "ws_001", + { + "run_as": "user1" + }, + { + "budget_policy_id": "policy_001", + "cluster_id": null + }, + { + "department": "Accounting", + "division": "Finance", + "environment": "Prod", + "service_name": "Billing" + }, + 10.0, + "DBU", + "JOBS" + ], + [ + "2026-05-08", + "ws_002", + { + "run_as": "user2" + }, + { + "budget_policy_id": "policy_002", + "cluster_id": null + }, + { + "department": "Admin", + "division": "Engineering", + "environment": "Staging", + "service_name": "DataPipeline" + }, + 20.0, + "DBU", + "SQL" + ], + [ + "2026-05-08", + "ws_003", + { + "run_as": "user3" + }, + { + "budget_policy_id": "policy_999", + "cluster_id": null + }, + { + "department": "Recruiting", + "division": "HR", + "environment": "Dev", + "service_name": "Onboarding" + }, + 5.0, + "DBU", + "INTERACTIVE" + ], + [ + "2026-05-08", + "ws_004", + { + "run_as": "user4" + }, + { + "budget_policy_id": null, + "cluster_id": null + }, + { + "department": "Compliance", + "division": "Legal", + "environment": "Test", + "service_name": "Audit" + }, + 8.0, + "DBU", + "MODEL_SERVING" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "usage_date", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "workspace_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "identity_metadata", + "type": "{\"keyType\":\"string\",\"type\":\"map\",\"valueContainsNull\":true,\"valueType\":\"string\"}" + }, + { + "metadata": "{}", + "name": "usage_metadata", + "type": "{\"keyType\":\"string\",\"type\":\"map\",\"valueContainsNull\":true,\"valueType\":\"string\"}" + }, + { + "metadata": "{}", + "name": "custom_tags", + "type": "{\"keyType\":\"string\",\"type\":\"map\",\"valueContainsNull\":true,\"valueType\":\"string\"}" + }, + { + "metadata": "{}", + "name": "usage_quantity", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "usage_unit", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "billing_origin_product", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, MapType\n", + "from datetime import date, timedelta\n", + "\n", + "# Define schema similar to system.billing.usage\n", + "schema = StructType([\n", + " StructField(\"usage_date\", StringType(), True),\n", + " StructField(\"workspace_id\", StringType(), True),\n", + " StructField(\"identity_metadata\", MapType(StringType(), StringType()), True),\n", + " StructField(\"usage_metadata\", MapType(StringType(), StringType()), True),\n", + " StructField(\"custom_tags\", MapType(StringType(), StringType()), True),\n", + " StructField(\"usage_quantity\", DoubleType(), True),\n", + " StructField(\"usage_unit\", StringType(), True),\n", + " StructField(\"billing_origin_product\", StringType(), True),\n", + "])\n", + "\n", + "today = date.today()\n", + "rows = [\n", + " # central_approved: policy_id matches, tags match registry\n", + " {\n", + " \"usage_date\": str(today),\n", + " \"workspace_id\": \"ws_001\",\n", + " \"identity_metadata\": {\"run_as\": \"user1\"},\n", + " \"usage_metadata\": {\"budget_policy_id\": \"policy_001\", \"cluster_id\": None},\n", + " \"custom_tags\": {\"division\": \"Finance\", \"department\": \"Accounting\", \"environment\": \"Prod\", \"service_name\": \"Billing\"},\n", + " \"usage_quantity\": 10.0,\n", + " \"usage_unit\": \"DBU\",\n", + " \"billing_origin_product\": \"JOBS\"\n", + " },\n", + " # central_drifted: policy_id matches, tags differ from registry\n", + " {\n", + " \"usage_date\": str(today),\n", + " \"workspace_id\": \"ws_002\",\n", + " \"identity_metadata\": {\"run_as\": \"user2\"},\n", + " \"usage_metadata\": {\"budget_policy_id\": \"policy_002\", \"cluster_id\": None},\n", + " \"custom_tags\": {\"division\": \"Engineering\", \"department\": \"Admin\", \"environment\": \"Staging\", \"service_name\": \"DataPipeline\"},\n", + " \"usage_quantity\": 20.0,\n", + " \"usage_unit\": \"DBU\",\n", + " \"billing_origin_product\": \"SQL\"\n", + " },\n", + " # workspace_created: policy_id not in registry\n", + " {\n", + " \"usage_date\": str(today),\n", + " \"workspace_id\": \"ws_003\",\n", + " \"identity_metadata\": {\"run_as\": \"user3\"},\n", + " \"usage_metadata\": {\"budget_policy_id\": \"policy_999\", \"cluster_id\": None},\n", + " \"custom_tags\": {\"division\": \"HR\", \"department\": \"Recruiting\", \"environment\": \"Dev\", \"service_name\": \"Onboarding\"},\n", + " \"usage_quantity\": 5.0,\n", + " \"usage_unit\": \"DBU\",\n", + " \"billing_origin_product\": \"INTERACTIVE\"\n", + " },\n", + " # default_no_policy: policy_id is null\n", + " {\n", + " \"usage_date\": str(today),\n", + " \"workspace_id\": \"ws_004\",\n", + " \"identity_metadata\": {\"run_as\": \"user4\"},\n", + " \"usage_metadata\": {\"budget_policy_id\": None, \"cluster_id\": None},\n", + " \"custom_tags\": {\"division\": \"Legal\", \"department\": \"Compliance\", \"environment\": \"Test\", \"service_name\": \"Audit\"},\n", + " \"usage_quantity\": 8.0,\n", + " \"usage_unit\": \"DBU\",\n", + " \"billing_origin_product\": \"MODEL_SERVING\"\n", + " }\n", + "]\n", + "\n", + "spark_df = spark.createDataFrame(rows, schema=schema)\n", + "spark_df.write.format(\"delta\").mode(\"overwrite\").saveAsTable(f\"{UC_PREFIX}.usage_dummy\")\n", + "display(spark.table(f\"{UC_PREFIX}.usage_dummy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "906c24a8-9f75-4a6e-8878-464ea32cff10", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
classificationworkspace_iddivisiondepartmentenvironmentservice_nametotal_dbus
central_driftedws_002EngineeringAdminStagingDataPipeline20.0
central_approvedws_001FinanceAccountingProdBilling10.0
default_no_policyws_004LegalComplianceTestAudit8.0
workspace_createdws_003HRRecruitingDevOnboarding5.0
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "central_drifted", + "ws_002", + "Engineering", + "Admin", + "Staging", + "DataPipeline", + 20.0 + ], + [ + "central_approved", + "ws_001", + "Finance", + "Accounting", + "Prod", + "Billing", + 10.0 + ], + [ + "default_no_policy", + "ws_004", + "Legal", + "Compliance", + "Test", + "Audit", + 8.0 + ], + [ + "workspace_created", + "ws_003", + "HR", + "Recruiting", + "Dev", + "Onboarding", + 5.0 + ] + ], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": {}, + "name": "classification", + "nullable": false, + "type": "string" + }, + { + "metadata": {}, + "name": "workspace_id", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "service_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "total_dbus", + "nullable": true, + "type": "double" + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 101 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "classification", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "workspace_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "total_dbus", + "type": "\"double\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema);\n", + " \n", + "WITH serverless AS (\n", + " SELECT\n", + " usage_date,\n", + " workspace_id,\n", + " identity_metadata.run_as AS run_as,\n", + " usage_metadata.budget_policy_id AS policy_id,\n", + " custom_tags,\n", + " usage_quantity,\n", + " usage_unit\n", + " FROM usage_dummy --Replace with system.billing.usage \n", + " WHERE billing_origin_product IN (\n", + " 'JOBS', 'SQL', 'INTERACTIVE', 'MODEL_SERVING',\n", + " 'VECTOR_SEARCH', 'LAKEHOUSE_MONITORING', 'PIPELINES'\n", + " )\n", + " AND usage_metadata.cluster_id IS NULL\n", + " AND usage_date >= current_date() - INTERVAL 30 DAY\n", + "),\n", + "classified AS (\n", + " SELECT\n", + " s.*,\n", + " CASE\n", + " WHEN s.policy_id IS NULL\n", + " THEN 'default_no_policy'\n", + " WHEN r.policy_name IS NOT NULL\n", + " AND s.custom_tags['division'] = r.division\n", + " AND s.custom_tags['department'] = r.department\n", + " AND s.custom_tags['environment'] = r.environment\n", + " AND s.custom_tags['service_name'] = r.service_name\n", + " THEN 'central_approved'\n", + " WHEN r.policy_name IS NOT NULL\n", + " THEN 'central_drifted'\n", + " ELSE 'workspace_created'\n", + " END AS classification,\n", + " r.policy_name\n", + " FROM serverless s\n", + " LEFT JOIN serverless_policies_registry r\n", + " ON r.policy_id = s.policy_id\n", + ")\n", + "SELECT\n", + " classification,\n", + " workspace_id,\n", + " custom_tags['division'] AS division,\n", + " custom_tags['department'] AS department,\n", + " custom_tags['environment'] AS environment,\n", + " custom_tags['service_name'] AS service_name,\n", + " SUM(usage_quantity) AS total_dbus\n", + "\n", + " FROM classified\n", + "GROUP BY ALL\n", + "ORDER BY total_dbus DESC;" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "5" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 8825077398391753, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2 + }, + "notebookName": "Budget Policy Drift Tracker(with Test Data)", + "widgets": { + "uc_catalog": { + "currentValue": "archana", + "nuid": "1d9e446f-9cbf-46e5-bded-bd333243cf7f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "main", + "label": null, + "name": "uc_catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String", + "dynamic": false + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "main", + "label": null, + "name": "uc_catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "uc_schema": { + "currentValue": "policy_schema", + "nuid": "b2de2c5d-8245-4be8-8a79-b896b732b540", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "uc_schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String", + "dynamic": false + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "uc_schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker.ipynb b/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker.ipynb new file mode 100644 index 0000000..4054f7b --- /dev/null +++ b/core-platform/serverless/budget-policy-drift-tracker/Budget Policy Drift Tracker.ipynb @@ -0,0 +1,1347 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "928b4868-9b3e-4933-9885-1fa7e397da39", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Notebook Overview\n", + "\n", + "This notebook demonstrates how to retrieve Databricks budget policies using the Account API, extract and normalize custom tags, and register the results in a Delta table for downstream analysis. \n", + "\n", + "**Key steps:**\n", + "- Authenticate with Azure AD using a service principal.\n", + "- Query the Databricks Account API for budget policies.\n", + "- Normalize custom tags into columns for easy querying.\n", + "- Store results in a Delta table using Unity Catalog.\n", + "- Compare expected vs. actual policy tags for validation.\n", + "\n", + "**Parameters:**\n", + "- `uc_catalog`: Unity Catalog catalog name (set via widget)\n", + "- `uc_schema`: Unity Catalog schema name (set via widget)\n", + "\n", + "**Requirements:**\n", + "- Service principal with Account Admin role.\n", + "- Access to Databricks Account API.\n", + "\n", + "**Outputs:**\n", + "- Delta table: `serverless_policies_registry` in the specified catalog and schema.\n", + "\n", + "**Usage:**\n", + "- Update widgets for catalog and schema as needed.\n", + "- Run cells sequentially for end-to-end workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ab8182fc-b6dc-4981-baf8-c79e8182d616", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Step `1`: \n", + "create this as scheduled job to populate **serverless_policies_registry_live** table" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "6f49095b-6500-49a5-b649-45ab3d737b74", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "\n", + "# Parameters required for Databricks Account API authentication\n", + "# - account_id: Databricks Account ID (can be found in the Databricks Account Console)\n", + "# - tenant_id: Azure AD Tenant ID (e.g. \"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\")\n", + "# - client_id: Azure AD Service Principal Client ID\n", + "# - client_secret: Azure AD Service Principal Client Secret\n", + "\n", + "account_id = \"\"\n", + "\n", + "# Azure AD Service Principal credentials (required for account-level APIs)\n", + "# The service principal must have Account Admin role\n", + "tenant_id = \"\" # e.g. \"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"\n", + "client_id = \"\" \n", + "client_secret = \"\"\n", + "\n", + "# Step 1: Get Azure AD token for Databricks resource\n", + "def get_azure_ad_token(tenant_id, client_id, client_secret):\n", + " url = f\"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token\"\n", + " payload = {\n", + " \"grant_type\": \"client_credentials\",\n", + " \"client_id\": client_id,\n", + " \"client_secret\": client_secret,\n", + " \"scope\": \"2ff814a6-3304-4ab8-85cb-cd0e6f879c1d/.default\" # Azure Databricks resource\n", + " }\n", + " response = requests.post(url, data=payload)\n", + " if response.status_code == 200:\n", + " return response.json()[\"access_token\"]\n", + " else:\n", + " raise Exception(f\"Failed to get token: {response.text}\")\n", + "\n", + "# Step 2: List budget policies\n", + "def get_budget_policies(account_id, token):\n", + " url = f\"https://accounts.azuredatabricks.net/api/2.1/accounts/{account_id}/budget-policies\"\n", + " headers = {\"Authorization\": f\"Bearer {token}\"}\n", + " all_policies = []\n", + " \n", + " while url:\n", + " response = requests.get(url, headers=headers)\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " all_policies.extend(data.get('policies', []))\n", + " # Handle pagination\n", + " next_token = data.get('next_page_token')\n", + " url = f\"https://accounts.azuredatabricks.net/api/2.1/accounts/{account_id}/budget-policies?page_token={next_token}\" if next_token else None\n", + " else:\n", + " print(f\"Error {response.status_code}: {response.text}\")\n", + " break\n", + " \n", + " return all_policies\n", + "\n", + "# Execute\n", + "token = get_azure_ad_token(tenant_id, client_id, client_secret)\n", + "budget_policies = get_budget_policies(account_id, token)\n", + "\n", + "print(f\"Found {len(budget_policies)} budget policies\")\n", + "df = pd.DataFrame(budget_policies)\n", + "if not df.empty:\n", + " display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "4cba46b3-e49d-4db3-b210-395dfa880ec3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# After creating the pandas DataFrame `df` from the budget policies\n", + "if not df.empty:\n", + " # Keep only the columns we need\n", + " tags_df = df[['policy_id','policy_name', 'custom_tags', 'binding_workspace_ids']].copy()\n", + "\n", + " # Convert the list of tag objects into a dict {key: value}\n", + " tags_dict_series = tags_df['custom_tags'].apply(\n", + " lambda tags: {t['key']: t['value'] for t in tags}\n", + " if isinstance(tags, list) else {}\n", + " )\n", + "\n", + " # Normalize the dicts into separate columns (one per tag key)\n", + " tags_expanded = pd.json_normalize(tags_dict_series)\n", + "\n", + " # Convert workspace_ids array to comma-separated string\n", + " workspace_ids_series = tags_df['binding_workspace_ids'].apply(\n", + " lambda ids: \",\".join(map(str, ids)) if isinstance(ids, list) else \"\"\n", + " ).rename('workspace_ids')\n", + "\n", + " # Combine policy_id, policy_name, workspace_ids with the expanded tag columns\n", + " result_df = pd.concat([\n", + " tags_df['policy_id'],\n", + " tags_df['policy_name'],\n", + " tags_expanded,\n", + " workspace_ids_series\n", + " ], axis=1)\n", + "\n", + " # Display the final table\n", + " display(result_df)\n", + "\n", + " # Write to Delta table serverless_policies_registry_live\n", + " spark_df = spark.createDataFrame(result_df)\n", + " spark_df.write.format(\"delta\") \\\n", + " .mode(\"overwrite\") \\\n", + " .option(\"overwriteSchema\", \"true\") \\\n", + " .saveAsTable(f\"{UC_PREFIX}.serverless_policies_registry_live\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "71d6792a-441d-4c4a-be71-69c5862fc9c6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "\n", + "## Step `2`:\n", + " Create sample dataset for `serverless_policies_registry`\n", + "\n", + "The sample dataset contains budget policy records, each with a `policy_id` and associated custom tags such as `division`, `department`, `environment`, and `service_name`. These tags represent metadata for each policy and are used to compare expected vs. actual values in downstream analysis. The dataset includes both matching and non-matching rows to validate tag normalization and policy registry accuracy.\n", + "\n", + "> **Note:** Update the custom tags in this dataset to match your organization's requirements and naming conventions for serverless policies." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f0f34153-36dd-4bce-bb5c-ec56e9aa6d57", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.databricks.empty-table+json": { + "directive_name": "CreateTable" + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema);\n", + "CREATE TABLE IF NOT EXISTS serverless_policies_registry (\n", + " policy_id STRING,\n", + " policy_name STRING,\n", + " division STRING,\n", + " department STRING,\n", + " environment STRING,\n", + " service_name STRING,\n", + " workspace_ids ARRAY,\n", + " managers ARRAY,\n", + " users ARRAY,\n", + " compliance_status STRING,\n", + " updated_at TIMESTAMP\n", + ") USING DELTA;\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "64fb8ffe-6e8d-4477-afd2-ec55f1d39b8e", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
policy_idpolicy_namedivisiondepartmentenvironmentservice_nameworkspace_idsmanagersuserscompliance_statusupdated_at
policy_001Finance PolicyFinanceAccountingProdBillingList(1001, 1002)List(alice@example.com)List(bob@example.com, carol@example.com)approved2026-05-08T00:00:00.000Z
policy_002Engineering PolicyEngineeringPlatformStagingDataPipelineList(2001)List(dave@example.com)List(eve@example.com)pending2026-05-08T00:00:00.000Z
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "policy_001", + "Finance Policy", + "Finance", + "Accounting", + "Prod", + "Billing", + [ + 1001, + 1002 + ], + [ + "alice@example.com" + ], + [ + "bob@example.com", + "carol@example.com" + ], + "approved", + "2026-05-08T00:00:00.000Z" + ], + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Platform", + "Staging", + "DataPipeline", + [ + 2001 + ], + [ + "dave@example.com" + ], + [ + "eve@example.com" + ], + "pending", + "2026-05-08T00:00:00.000Z" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "policy_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "policy_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "workspace_ids", + "type": "{\"containsNull\":true,\"elementType\":\"long\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "managers", + "type": "{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "users", + "type": "{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}" + }, + { + "metadata": "{}", + "name": "compliance_status", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "updated_at", + "type": "\"timestamp\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Sample budget policies with the required custom tags\n", + "sample_policies = [\n", + " {\n", + " \"policy_id\": \"policy_001\",\n", + " \"policy_name\": \"Finance Policy\",\n", + " \"custom_tags\": [\n", + " {\"key\": \"division\", \"value\": \"Finance\"},\n", + " {\"key\": \"department\", \"value\": \"Accounting\"},\n", + " {\"key\": \"environment\", \"value\": \"Prod\"},\n", + " {\"key\": \"service_name\", \"value\": \"Billing\"}\n", + " ],\n", + " \"workspace_ids\": [1001, 1002],\n", + " \"managers\": [\"alice@example.com\"],\n", + " \"users\": [\"bob@example.com\", \"carol@example.com\"],\n", + " \"compliance_status\": \"approved\",\n", + " \"updated_at\": pd.Timestamp(\"2026-05-08\")\n", + " },\n", + " {\n", + " \"policy_id\": \"policy_002\",\n", + " \"policy_name\": \"Engineering Policy\",\n", + " \"custom_tags\": [\n", + " {\"key\": \"division\", \"value\": \"Engineering\"},\n", + " {\"key\": \"department\", \"value\": \"Platform\"},\n", + " {\"key\": \"environment\", \"value\": \"Staging\"},\n", + " {\"key\": \"service_name\", \"value\": \"DataPipeline\"}\n", + " ],\n", + " \"workspace_ids\": [2001],\n", + " \"managers\": [\"dave@example.com\"],\n", + " \"users\": [\"eve@example.com\"],\n", + " \"compliance_status\": \"pending\",\n", + " \"updated_at\": pd.Timestamp(\"2026-05-08\")\n", + " }\n", + "]\n", + "\n", + "# Normalize custom_tags into columns\n", + "df = pd.DataFrame(sample_policies)\n", + "tags_dict_series = df['custom_tags'].apply(\n", + " lambda tags: {t['key']: t['value'] for t in tags} if isinstance(tags, list) else {}\n", + ")\n", + "tags_expanded = pd.json_normalize(tags_dict_series)\n", + "\n", + "# Combine with other columns as per schema\n", + "result_df = pd.concat([\n", + " df['policy_id'],\n", + " df['policy_name'],\n", + " tags_expanded,\n", + " df['workspace_ids'],\n", + " df['managers'],\n", + " df['users'],\n", + " df['compliance_status'],\n", + " df['updated_at']\n", + "], axis=1)\n", + "\n", + "# Convert to Spark DataFrame and write to Delta table\n", + "spark_df = spark.createDataFrame(result_df)\n", + "spark_df.write.format(\"delta\") \\\n", + " .mode(\"overwrite\") \\\n", + " .option(\"overwriteSchema\", \"true\") \\\n", + " .saveAsTable(f\"{UC_PREFIX}.serverless_policies_registry\")\n", + "\n", + "display(spark.table(f\"{UC_PREFIX}.serverless_policies_registry\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8cfd91f8-1e8a-41af-bf0b-0b02bea877eb", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"uc_catalog\", spark.catalog.currentCatalog())\n", + "dbutils.widgets.text(\"uc_schema\", spark.catalog.currentDatabase())\n", + "\n", + "UC_CATALOG = dbutils.widgets.get(\"uc_catalog\")\n", + "UC_SCHEMA = dbutils.widgets.get(\"uc_schema\")\n", + "\n", + "UC_PREFIX = f\"{UC_CATALOG}.{UC_SCHEMA}\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "0d94381f-d0fa-4d7f-a020-ebef2c536fe0", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Set A — Policy drift\n", + "Detects when central-defined policies (tags, workspace bindings, managers) change in production.\n", + "Sample SQL (drift on tags):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "73948a4a-c848-46f3-a407-be2681974b08", + "showTitle": false, + "tableResultSettingsMap": { + "0": { + "dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1777567982835}", + "filterBlob": null, + "queryPlanFiltersBlob": null, + "tableResultIndex": 0 + } + }, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
policy_idpolicy_nameexpected_divisionactual_divisionexpected_departmentactual_departmentexpected_environmentactual_environmentexpected_service_nameactual_service_name
policy_002Engineering PolicyEngineeringEngineeringPlatformAdminStagingStagingDataPipelineDataPipeline
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "policy_002", + "Engineering Policy", + "Engineering", + "Engineering", + "Platform", + "Admin", + "Staging", + "Staging", + "DataPipeline", + "DataPipeline" + ] + ], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": {}, + "name": "policy_id", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "policy_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "expected_service_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "actual_service_name", + "nullable": true, + "type": "string" + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 99 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "policy_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "policy_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "expected_service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "actual_service_name", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema); \n", + "\n", + "WITH live AS (\n", + " SELECT\n", + " policy_id,\n", + " policy_name,\n", + " division,\n", + " department,\n", + " environment,\n", + " service_name\n", + " FROM serverless_policies_registry_live -- populated by a daily Job calling the Account API,replace with serverless_policies_registry_live\n", + ")\n", + "SELECT\n", + " r.policy_id,\n", + " r.policy_name,\n", + " r.division AS expected_division,\n", + " l.division AS actual_division,\n", + " r.department AS expected_department,\n", + " l.department AS actual_department,\n", + " r.environment AS expected_environment,\n", + " l.environment AS actual_environment,\n", + " r.service_name AS expected_service_name,\n", + " l.service_name AS actual_service_name\n", + "FROM serverless_policies_registry r\n", + "JOIN live l\n", + " ON l.policy_name = r.policy_name\n", + "WHERE r.division <> l.division\n", + " OR r.department <> l.department\n", + " OR r.environment <> l.environment\n", + " OR r.service_name <> l.service_name " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "44e1ba36-470d-49da-b73d-52f1cd808c8f", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Sample SQL (drift from audit logs):****" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d84c54df-dc54-4abb-9c6e-726a232d52e9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
event_timeactoraction_namerequest_params
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": { + "comment": "Timestamp of the event" + }, + "name": "event_time", + "nullable": true, + "type": "timestamp" + }, + { + "metadata": {}, + "name": "actor", + "nullable": true, + "type": "string" + }, + { + "metadata": { + "comment": "The name of the action that has been performed as part of the audit event. Action names vary depending on the Databricks service (service_name). See [documentation of actions per service](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details." + }, + "name": "action_name", + "nullable": true, + "type": "string" + }, + { + "metadata": { + "comment": "A map of key/value pairs with the request parameters. Request parameters vary by request type. See [documentation of request parameters per action](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details." + }, + "name": "request_params", + "nullable": true, + "type": { + "keyType": "string", + "type": "map", + "valueContainsNull": true, + "valueType": "string" + } + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 19 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{\"comment\": \"Timestamp of the event\"}", + "name": "event_time", + "type": "\"timestamp\"" + }, + { + "metadata": "{}", + "name": "actor", + "type": "\"string\"" + }, + { + "metadata": "{\"comment\": \"The name of the action that has been performed as part of the audit event. Action names vary depending on the Databricks service (service_name). See [documentation of actions per service](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details.\"}", + "name": "action_name", + "type": "\"string\"" + }, + { + "metadata": "{\"comment\": \"A map of key/value pairs with the request parameters. Request parameters vary by request type. See [documentation of request parameters per action](https://learn.microsoft.com/azure/databricks/admin/account-settings/audit-logs) for details.\"}", + "name": "request_params", + "type": "{\"keyType\":\"string\",\"type\":\"map\",\"valueContainsNull\":true,\"valueType\":\"string\"}" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "SELECT\n", + " event_time,\n", + " user_identity.email AS actor,\n", + " action_name,\n", + " request_params\n", + "FROM system.access.audit\n", + "WHERE service_name = 'accounts'\n", + " AND action_name IN ('updateBudgetPolicy', 'deleteBudgetPolicy', 'updateRuleSet')\n", + " AND event_date >= current_date() - INTERVAL 1 DAY\n", + "ORDER BY event_time DESC;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "816a6cf2-265c-4f5d-8c69-4c9d501933c5", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Set B — Usage classification on system.billing.usage\n", + "\n", + "\n", + "\n", + "Classifies every serverless usage row as central-approved / central-drifted / workspace-created / default (no policy)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "906c24a8-9f75-4a6e-8878-464ea32cff10", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
classificationworkspace_iddivisiondepartmentenvironmentservice_nametotal_dbus
central_driftedws_002EngineeringAdminStagingDataPipeline20.0
central_approvedws_001FinanceAccountingProdBilling10.0
default_no_policyws_004LegalComplianceTestAudit8.0
workspace_createdws_003HRRecruitingDevOnboarding5.0
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + "central_drifted", + "ws_002", + "Engineering", + "Admin", + "Staging", + "DataPipeline", + 20.0 + ], + [ + "central_approved", + "ws_001", + "Finance", + "Accounting", + "Prod", + "Billing", + 10.0 + ], + [ + "default_no_policy", + "ws_004", + "Legal", + "Compliance", + "Test", + "Audit", + 8.0 + ], + [ + "workspace_created", + "ws_003", + "HR", + "Recruiting", + "Dev", + "Onboarding", + 5.0 + ] + ], + "datasetInfos": [ + { + "name": "_sqldf", + "schema": { + "fields": [ + { + "metadata": {}, + "name": "classification", + "nullable": false, + "type": "string" + }, + { + "metadata": {}, + "name": "workspace_id", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "division", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "department", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "environment", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "service_name", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "total_dbus", + "nullable": true, + "type": "double" + } + ], + "type": "struct" + }, + "tableIdentifier": null, + "typeStr": "pyspark.sql.connect.dataframe.DataFrame" + } + ], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": { + "createTempViewForImplicitDf": true, + "dataframeName": "_sqldf", + "executionCount": 101 + }, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "classification", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "workspace_id", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "division", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "environment", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "service_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "total_dbus", + "type": "\"double\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "%sql\n", + "\n", + "USE CATALOG IDENTIFIER(:uc_catalog);\n", + "USE SCHEMA IDENTIFIER(:uc_schema);\n", + " \n", + "WITH serverless AS (\n", + " SELECT\n", + " usage_date,\n", + " workspace_id,\n", + " identity_metadata.run_as AS run_as,\n", + " usage_metadata.budget_policy_id AS policy_id,\n", + " custom_tags,\n", + " usage_quantity,\n", + " usage_unit\n", + " FROM system.billing.usage \n", + " WHERE billing_origin_product IN (\n", + " 'JOBS', 'SQL', 'INTERACTIVE', 'MODEL_SERVING',\n", + " 'VECTOR_SEARCH', 'LAKEHOUSE_MONITORING', 'PIPELINES'\n", + " )\n", + " AND usage_metadata.cluster_id IS NULL\n", + " AND usage_date >= current_date() - INTERVAL 30 DAY\n", + "),\n", + "classified AS (\n", + " SELECT\n", + " s.*,\n", + " CASE\n", + " WHEN s.policy_id IS NULL\n", + " THEN 'default_no_policy'\n", + " WHEN r.policy_name IS NOT NULL\n", + " AND s.custom_tags['division'] = r.division\n", + " AND s.custom_tags['department'] = r.department\n", + " AND s.custom_tags['environment'] = r.environment\n", + " AND s.custom_tags['service_name'] = r.service_name\n", + " THEN 'central_approved'\n", + " WHEN r.policy_name IS NOT NULL\n", + " THEN 'central_drifted'\n", + " ELSE 'workspace_created'\n", + " END AS classification,\n", + " r.policy_name\n", + " FROM serverless s\n", + " LEFT JOIN serverless_policies_registry r\n", + " ON r.policy_id = s.policy_id\n", + ")\n", + "SELECT\n", + " classification,\n", + " workspace_id,\n", + " custom_tags['division'] AS division,\n", + " custom_tags['department'] AS department,\n", + " custom_tags['environment'] AS environment,\n", + " custom_tags['service_name'] AS service_name,\n", + " SUM(usage_quantity) AS total_dbus\n", + "\n", + " FROM classified\n", + "GROUP BY ALL\n", + "ORDER BY total_dbus DESC;" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "5" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 8825077398391753, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2 + }, + "notebookName": "Budget Policy Drift Tracker", + "widgets": { + "uc_catalog": { + "currentValue": "archana", + "nuid": "1d9e446f-9cbf-46e5-bded-bd333243cf7f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "main", + "label": null, + "name": "uc_catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String", + "dynamic": false + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "main", + "label": null, + "name": "uc_catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "uc_schema": { + "currentValue": "policy_schema", + "nuid": "b2de2c5d-8245-4be8-8a79-b896b732b540", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "uc_schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String", + "dynamic": false + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "uc_schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/core-platform/serverless/budget-policy-drift-tracker/README.md b/core-platform/serverless/budget-policy-drift-tracker/README.md new file mode 100644 index 0000000..803ab79 --- /dev/null +++ b/core-platform/serverless/budget-policy-drift-tracker/README.md @@ -0,0 +1,116 @@ +# Budget Policy Drift Tracker + +This solution accelerator helps platform teams monitor serverless budget-policy compliance and detect policy drift over time. + +It focuses on: + +- Tracking whether serverless usage remains aligned with centrally approved policy tags. +- Classifying usage into compliance buckets (`central_approved`, `central_drifted`, `workspace_created`, `default_no_policy`). +- Surfacing drift signals for dashboards and alerting workflows. + +## Objective + +Enable serverless usage tracking while preserving a consistent tag model for cost allocation across workloads. + +Expected outcome: + +- Serverless usage can be validated against required policy tags (`division`, `department`, `environment`, `service_name`). +- Non-compliant or unmanaged usage patterns can be detected and escalated quickly. + +## Repository Contents + +- `Budget Policy Drift Tracker.ipynb` + - Production-oriented notebook. + - Uses `system.billing.usage` for usage classification. + - Expects a live policy registry table in Unity Catalog. + +- `Budget Policy Drift Tracker(with Test Data).ipynb` + - Demo/testing notebook. + - Creates dummy policy-live and usage tables for validation. + - Best option to validate logic before pointing to production system tables. + +## What This Accelerator Covers + +This repository intentionally focuses on **monitoring and drift detection**. + +It includes: + +1. Policy drift checks between expected policy tags and live policy tags. +2. Audit-log query patterns for policy changes. +3. Usage classification logic for serverless consumption. + + + +## Prerequisites + +- Databricks workspace with Unity Catalog enabled. +- Permissions to create/read tables in a target catalog and schema. +- Access to `system.billing.usage` (for production notebook runs). +- Access to `system.access.audit` (optional but recommended for audit-based drift checks). +- Notebook runtime with Python + Spark SQL support. + +If you run the API-based policy extraction cells: + +- Account-level API access and a service principal with required account permissions. + +## Quick Start + +1. Import notebook into your Databricks workspace. +2. Set widget values for: + - `uc_catalog` + - `uc_schema` +3. Run cells sequentially. +4. Start with the test notebook first: + - `Budget Policy Drift Tracker(with Test Data).ipynb` +5. After validation, switch to: + - `Budget Policy Drift Tracker.ipynb` + - Update table references if needed for your environment. + +## Data Objects Created/Used + +Typical tables referenced in the workflow: + +- `serverless_policies_registry` (expected/approved policy metadata). +- `serverless_policies_registry_live` (live policy snapshot). +- `serverless_policies_registry_live_dummy` (test notebook only). +- `usage_dummy` (test notebook only). +- `system.billing.usage` (production usage source). + +## Core Monitoring Logic + +### A) Policy Drift + +Compares expected policy tags from `serverless_policies_registry` with live values and flags mismatches for: + +- `division` +- `department` +- `environment` +- `service_name` + +### B) Usage Classification + +Classifies usage rows as: + +- `central_approved`: policy exists and tags match. +- `central_drifted`: policy exists but tags do not match. +- `workspace_created`: policy ID not found in central registry. +- `default_no_policy`: usage with no policy ID. + +This output can be used directly in dashboards/alerts for governance and chargeback visibility. + +## Operationalizing + +For production adoption: + +- Schedule refresh of live policy data (if using live policy extraction). +- Build dashboards on classification aggregates (workspace, policy, tag dimensions). +- Configure alerts for: + - any `default_no_policy` usage, + - any `workspace_created` usage, + - threshold breaches on `central_drifted`. + +## Notes + +- Replace placeholder credentials and identifiers before running. +- Keep naming conventions for required tags consistent across policy definitions and usage tagging. +- Test with the dummy-data notebook before enabling production monitoring.