From 12c9cb3d812f944a779b8126d7d79b4da9b3590a Mon Sep 17 00:00:00 2001 From: corticalstack Date: Wed, 27 May 2026 14:02:34 +0200 Subject: [PATCH 1/4] fix(12): drop az apim extension dependency in deploy-o3-backend 12-01-deploy-o3-backend.ipynb failed on Step 5 with "'subscription' is misspelled or not recognized" because `az apim subscription list-secrets` requires the `apim` Azure CLI extension that is not part of the base CLI. Rewrote Steps 3 and 5 to use `az rest` against the ARM management endpoint, matching the pattern already used by 10-01 and 11-01. Step 2 now also resolves SUB_ID and an APIM_BASE_URI helper used by the two ARM calls. No new dependencies; works with the base Azure CLI. --- .../12-01-deploy-o3-backend.ipynb | 67 ++----------------- 1 file changed, 4 insertions(+), 63 deletions(-) diff --git a/12-foundry-iq-deep-research/12-01-deploy-o3-backend.ipynb b/12-foundry-iq-deep-research/12-01-deploy-o3-backend.ipynb index 154be7f..6ff6185 100644 --- a/12-foundry-iq-deep-research/12-01-deploy-o3-backend.ipynb +++ b/12-foundry-iq-deep-research/12-01-deploy-o3-backend.ipynb @@ -85,25 +85,7 @@ "id": "12010001-0000-0000-0000-000000000005", "metadata": {}, "outputs": [], - "source": [ - "# Derive APIM name and suffix from GATEWAY_URL\n", - "# e.g. https://apim-foundry-{suffix}.azure-api.net/openai -> apim-foundry-{suffix}\n", - "APIM_NAME = GATEWAY_URL.split('//')[1].split('.')[0]\n", - "SUFFIX = APIM_NAME.split('-')[-1] # e.g. {suffix}\n", - "CORE_RG = f'rg-foundry-core-{SUFFIX}'\n", - "\n", - "# Get deployer principal ID from cached JWT\n", - "token = subprocess.run(\n", - " 'az account get-access-token --query accessToken -o tsv',\n", - " shell=True, capture_output=True, text=True\n", - ").stdout.strip()\n", - "padding = '=' * (4 - len(token.split('.')[1]) % 4)\n", - "PRINCIPAL_ID = json.loads(base64.b64decode(token.split('.')[1] + padding))['oid']\n", - "\n", - "print(f'APIM service : {APIM_NAME}')\n", - "print(f'Core RG : {CORE_RG}')\n", - "print(f'Principal ID : {PRINCIPAL_ID}')" - ] + "source": "# Derive APIM name and suffix from GATEWAY_URL\n# e.g. https://apim-foundry-{suffix}.azure-api.net/openai -> apim-foundry-{suffix}\nAPIM_NAME = GATEWAY_URL.split('//')[1].split('.')[0]\nSUFFIX = APIM_NAME.split('-')[-1] # e.g. {suffix}\nCORE_RG = f'rg-foundry-core-{SUFFIX}'\n\n# Subscription ID + ARM base URI used by Steps 3 and 5 to call APIM management\n# endpoints directly via `az rest`. This avoids requiring the `az apim` CLI\n# extension, which is not bundled with the base Azure CLI.\nSUB_ID = subprocess.run(\n 'az account show --query id -o tsv', shell=True, capture_output=True, text=True\n).stdout.strip()\nAPIM_BASE_URI = (\n f'https://management.azure.com/subscriptions/{SUB_ID}'\n f'/resourceGroups/{CORE_RG}/providers/Microsoft.ApiManagement/service/{APIM_NAME}'\n)\n\n# Get deployer principal ID from cached JWT\ntoken = subprocess.run(\n 'az account get-access-token --query accessToken -o tsv',\n shell=True, capture_output=True, text=True\n).stdout.strip()\npadding = '=' * (4 - len(token.split('.')[1]) % 4)\nPRINCIPAL_ID = json.loads(base64.b64decode(token.split('.')[1] + padding))['oid']\n\nprint(f'APIM service : {APIM_NAME}')\nprint(f'Core RG : {CORE_RG}')\nprint(f'Subscription : {SUB_ID}')\nprint(f'Principal ID : {PRINCIPAL_ID}')" }, { "cell_type": "markdown", @@ -119,19 +101,7 @@ "id": "12010001-0000-0000-0000-000000000007", "metadata": {}, "outputs": [], - "source": [ - "check = subprocess.run(\n", - " f'az apim backend show -g \"{CORE_RG}\" --service-name \"{APIM_NAME}\" --backend-id openai-research -o none',\n", - " shell=True, capture_output=True, text=True\n", - ")\n", - "BACKEND_EXISTS = check.returncode == 0\n", - "\n", - "if BACKEND_EXISTS:\n", - " print('✅ openai-research APIM backend already exists - skipping Bicep deployment.')\n", - " print(' Proceeding to read existing resources.')\n", - "else:\n", - " print('ℹ️ openai-research backend not found - will deploy main.bicep.')" - ] + "source": "check = subprocess.run(\n f'az rest --method GET'\n f' --uri \"{APIM_BASE_URI}/backends/openai-research?api-version=2024-06-01-preview\"'\n f' -o none',\n shell=True, capture_output=True, text=True\n)\nBACKEND_EXISTS = check.returncode == 0\n\nif BACKEND_EXISTS:\n print('✅ openai-research APIM backend already exists - skipping Bicep deployment.')\n print(' Proceeding to read existing resources.')\nelse:\n print('ℹ️ openai-research backend not found - will deploy main.bicep.')" }, { "cell_type": "markdown", @@ -186,36 +156,7 @@ "id": "12010001-0000-0000-0000-000000000011", "metadata": {}, "outputs": [], - "source": [ - "# Try the dedicated deep research subscription first;\n", - "# fall back to the alpha subscription if it doesn't exist yet.\n", - "keys_result = subprocess.run(\n", - " f'az apim subscription list-secrets -g \"{CORE_RG}\" --service-name \"{APIM_NAME}\" '\n", - " f'--subscription-id foundry-gateway-dr --query primaryKey -o tsv',\n", - " shell=True, capture_output=True, text=True\n", - ")\n", - "\n", - "if keys_result.returncode == 0 and keys_result.stdout.strip():\n", - " DR_GATEWAY_KEY = keys_result.stdout.strip()\n", - " print('✅ Using foundry-gateway-dr subscription key')\n", - "else:\n", - " # Fall back to alpha subscription (created by the core gateway deployment)\n", - " fallback = subprocess.run(\n", - " f'az apim subscription list-secrets -g \"{CORE_RG}\" --service-name \"{APIM_NAME}\" '\n", - " f'--subscription-id foundry-gateway-alpha --query primaryKey -o tsv',\n", - " shell=True, capture_output=True, text=True\n", - " )\n", - " if fallback.returncode == 0 and fallback.stdout.strip():\n", - " DR_GATEWAY_KEY = fallback.stdout.strip()\n", - " print('ℹ️ Using foundry-gateway-alpha subscription key (fallback)')\n", - " else:\n", - " raise RuntimeError('Could not retrieve APIM subscription key. Check az login and hub RG.')\n", - "\n", - "DR_MODEL = 'o3-deep-research'\n", - "\n", - "print(f'DR model : {DR_MODEL}')\n", - "print(f'DR key : {DR_GATEWAY_KEY[:4]}... (hidden)')" - ] + "source": "# Try the dedicated deep research subscription first; fall back to the alpha\n# subscription if it doesn't exist yet. Uses `az rest` against ARM directly,\n# matching the pattern in 10-01 and 11-01, so the `az apim` extension is not needed.\ndef _list_apim_subscription_key(sub_name: str) -> str | None:\n r = subprocess.run(\n f'az rest --method POST'\n f' --uri \"{APIM_BASE_URI}/subscriptions/{sub_name}/listSecrets?api-version=2024-06-01-preview\"'\n f' --query primaryKey -o tsv',\n shell=True, capture_output=True, text=True\n )\n return r.stdout.strip() if r.returncode == 0 and r.stdout.strip() else None\n\nDR_GATEWAY_KEY = _list_apim_subscription_key('foundry-gateway-dr')\nif DR_GATEWAY_KEY:\n print('✅ Using foundry-gateway-dr subscription key')\nelse:\n DR_GATEWAY_KEY = _list_apim_subscription_key('foundry-gateway-alpha')\n if DR_GATEWAY_KEY:\n print('ℹ️ Using foundry-gateway-alpha subscription key (fallback)')\n else:\n raise RuntimeError('Could not retrieve APIM subscription key. Check az login and core RG.')\n\nDR_MODEL = 'o3-deep-research'\n\nprint(f'DR model : {DR_MODEL}')\nprint(f'DR key : {DR_GATEWAY_KEY[:4]}... (hidden)')" }, { "cell_type": "markdown", @@ -284,4 +225,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From 8b21b807bc9ae69cf75bfa6a24d205518494d9b5 Mon Sep 17 00:00:00 2001 From: corticalstack Date: Wed, 27 May 2026 14:02:34 +0200 Subject: [PATCH 2/4] chore: bump version to 0.8.4 and add release notes --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2cbc73..de7a854 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.4] - 2026-05-27 + +### Fixed + +- `12-foundry-iq-deep-research/12-01-deploy-o3-backend.ipynb` failed on Step 5 with `'subscription' is misspelled or not recognized` because it used `az apim subscription list-secrets`, which requires the `apim` Azure CLI extension. Same issue affected Step 3's `az apim backend show`. Both calls rewritten to use `az rest` against the ARM management endpoint, matching the pattern already used by `10-01-deploy-search-and-project.ipynb` and `11-01-deploy-setup.ipynb`. Step 2 now also resolves `SUB_ID` and an `APIM_BASE_URI` helper used by Steps 3 and 5. No new dependencies; works with the base Azure CLI. + ## [0.8.3] - 2026-05-27 Two leftovers from prior cleanup passes: residual "Lab N" pointers in repo-root files, and a repo-wide em/en dash sweep that had never been done. diff --git a/pyproject.toml b/pyproject.toml index b707a49..ad85cc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "awesome-foundry-nextgen" -version = "0.8.3" +version = "0.8.4" description = "Hands-on labs for Microsoft Foundry — Azure's unified PaaS for enterprise AI" requires-python = ">=3.11" dependencies = [ From 9792eb93131d97221a5125693a01c5339b59688d Mon Sep 17 00:00:00 2001 From: corticalstack Date: Wed, 27 May 2026 14:28:25 +0200 Subject: [PATCH 3/4] fix(05,12): raise o3-deep-research TPM capacity from 10 to 200 The original SKU capacity of 10 (= 10K TPM) throttled multi-step deep-research runs with 429 errors before completion. Raised to 200 (= 200K TPM) in both Bicep files that define the deployment: - 05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep - 12-foundry-iq-deep-research/main.bicep The new value stays well under the Norway East o3-DeepResearch subscription quota (limit 3000). Existing live deployments must be updated separately, either by a fresh bicep apply or via: az cognitiveservices account deployment update \ -g rg-foundry-core-{suffix} -n aif-research-{suffix} \ --deployment-name o3-deep-research --sku-capacity 200 --- .../05-02-deploy-foundry-core-gateway/main.bicep | 5 ++++- 12-foundry-iq-deep-research/main.bicep | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep b/05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep index bd7c372..714d99d 100644 --- a/05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep +++ b/05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep @@ -120,7 +120,10 @@ resource researchHub 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' = resource researchModel 'Microsoft.CognitiveServices/accounts/deployments@2025-04-01-preview' = { parent: researchHub name: 'o3-deep-research' - sku: { name: 'GlobalStandard', capacity: 10 } + // Capacity is K-TPM. 10 was too low - multi-step deep-research runs hit + // 429 throttling before completing. 200 gives realistic headroom while + // staying well under the Norway East o3-DeepResearch subscription quota. + sku: { name: 'GlobalStandard', capacity: 200 } properties: { model: { name: 'o3-deep-research' diff --git a/12-foundry-iq-deep-research/main.bicep b/12-foundry-iq-deep-research/main.bicep index eb4ae51..7444360 100644 --- a/12-foundry-iq-deep-research/main.bicep +++ b/12-foundry-iq-deep-research/main.bicep @@ -54,7 +54,10 @@ resource researchHub 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' = resource researchModel 'Microsoft.CognitiveServices/accounts/deployments@2025-04-01-preview' = { parent: researchHub name: 'o3-deep-research' - sku: { name: 'GlobalStandard', capacity: 10 } + // Capacity is K-TPM. 10 was too low - multi-step deep-research runs hit + // 429 throttling before completing. 200 gives realistic headroom while + // staying well under the Norway East o3-DeepResearch subscription quota. + sku: { name: 'GlobalStandard', capacity: 200 } properties: { model: { name: 'o3-deep-research' From df62cb33f42b836d720f8afef5acf13cac28cc56 Mon Sep 17 00:00:00 2001 From: corticalstack Date: Wed, 27 May 2026 14:28:25 +0200 Subject: [PATCH 4/4] chore: bump version to 0.8.5 and add release notes --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de7a854..4a0b115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.5] - 2026-05-27 + +### Fixed + +- Raised the `o3-deep-research` model deployment capacity from 10 (10K TPM) to 200 (200K TPM) in both `05-foundry-project-pattern-setup/05-02-deploy-foundry-core-gateway/main.bicep` and `12-foundry-iq-deep-research/main.bicep`. The original 10K cap throttled multi-step deep-research runs with 429 errors before completion. 200K stays well under the Norway East `o3-DeepResearch` subscription quota (3000). Existing live deployments must be updated separately (`az cognitiveservices account deployment update --sku-capacity 200`) or via a fresh Bicep apply. + ## [0.8.4] - 2026-05-27 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index ad85cc2..03cd9d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "awesome-foundry-nextgen" -version = "0.8.4" +version = "0.8.5" description = "Hands-on labs for Microsoft Foundry — Azure's unified PaaS for enterprise AI" requires-python = ">=3.11" dependencies = [