diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..b290768
Binary files /dev/null and b/.DS_Store differ
diff --git a/.github/workflows/databricks-deployment.yml b/.github/workflows/databricks-deployment.yml
index fe3f508..490dbf8 100644
--- a/.github/workflows/databricks-deployment.yml
+++ b/.github/workflows/databricks-deployment.yml
@@ -7,7 +7,7 @@ on:
- master
env:
- DATABRICKS_HOST: https://adb-4181970831265458.18.azuredatabricks.net/
+ DATABRICKS_HOST: ${{ secrets.WORKSPACE_HOST_NAME }}
PYTHON_VERSION: '3.9'
jobs:
diff --git a/Notebooks/.DS_Store b/Notebooks/.DS_Store
new file mode 100644
index 0000000..e8b5dcb
Binary files /dev/null and b/Notebooks/.DS_Store differ
diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb
index 291cd4d..56544cd 100644
--- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb
+++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb
@@ -18,7 +18,8 @@
},
"outputs": [],
"source": [
- "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC"
+ "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC\n",
+ "# Dummy change"
]
},
{
@@ -240,21 +241,21 @@
"label": null,
"name": "catalog_name",
"options": {
- "widgetDisplayType": "Text",
- "validationRegex": null
+ "validationRegex": null,
+ "widgetDisplayType": "Text"
},
"parameterDataType": "String"
},
"widgetInfo": {
- "widgetType": "text",
"defaultValue": "pedroz_e2edata_dev",
"label": null,
"name": "catalog_name",
"options": {
- "widgetType": "text",
"autoCreated": null,
- "validationRegex": null
- }
+ "validationRegex": null,
+ "widgetType": "text"
+ },
+ "widgetType": "text"
}
}
}
diff --git a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb
index ad80913..1e6f1ce 100644
--- a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb
+++ b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb
@@ -322,21 +322,21 @@
"label": null,
"name": "catalog_name",
"options": {
- "widgetDisplayType": "Text",
- "validationRegex": null
+ "validationRegex": null,
+ "widgetDisplayType": "Text"
},
"parameterDataType": "String"
},
"widgetInfo": {
- "widgetType": "text",
"defaultValue": "pedroz_e2edata_dev",
"label": null,
"name": "catalog_name",
"options": {
- "widgetType": "text",
"autoCreated": null,
- "validationRegex": null
- }
+ "validationRegex": null,
+ "widgetType": "text"
+ },
+ "widgetType": "text"
}
}
}
diff --git a/Notebooks/3_Inference/batch-inference.ipynb b/Notebooks/3_Inference/batch-inference.ipynb
index f348524..a5e6264 100644
--- a/Notebooks/3_Inference/batch-inference.ipynb
+++ b/Notebooks/3_Inference/batch-inference.ipynb
@@ -128,98 +128,7 @@
"title": ""
}
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
- " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n",
- "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
- " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " sepal_length_cm | \n",
- " sepal_width_cm | \n",
- " petal_length_cm | \n",
- " petal_width_cm | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5.1 | \n",
- " 3.5 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 4.9 | \n",
- " 3.0 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 4.7 | \n",
- " 3.2 | \n",
- " 1.3 | \n",
- " 0.2 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 4.6 | \n",
- " 3.1 | \n",
- " 1.5 | \n",
- " 0.2 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5.0 | \n",
- " 3.6 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n",
- "0 5.1 3.5 1.4 0.2\n",
- "1 4.9 3.0 1.4 0.2\n",
- "2 4.7 3.2 1.3 0.2\n",
- "3 4.6 3.1 1.5 0.2\n",
- "4 5.0 3.6 1.4 0.2"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Pull the dataset for running the inference\n",
"iris_samples = datasets.load_iris(as_frame=True)\n",
@@ -266,94 +175,7 @@
"title": ""
}
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " sepal_length_cm | \n",
- " sepal_width_cm | \n",
- " petal_length_cm | \n",
- " petal_width_cm | \n",
- " prediction | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5.1 | \n",
- " 3.5 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 4.9 | \n",
- " 3.0 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 4.7 | \n",
- " 3.2 | \n",
- " 1.3 | \n",
- " 0.2 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 4.6 | \n",
- " 3.1 | \n",
- " 1.5 | \n",
- " 0.2 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5.0 | \n",
- " 3.6 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm prediction\n",
- "0 5.1 3.5 1.4 0.2 0\n",
- "1 4.9 3.0 1.4 0.2 0\n",
- "2 4.7 3.2 1.3 0.2 0\n",
- "3 4.6 3.1 1.5 0.2 0\n",
- "4 5.0 3.6 1.4 0.2 0"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"predictions = model.predict(df_samples)\n",
"df_samples['prediction'] = predictions\n",
@@ -377,102 +199,7 @@
"title": ""
}
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " sepal_length_cm | \n",
- " sepal_width_cm | \n",
- " petal_length_cm | \n",
- " petal_width_cm | \n",
- " prediction | \n",
- " actual_label | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5.1 | \n",
- " 3.5 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 4.9 | \n",
- " 3.0 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 4.7 | \n",
- " 3.2 | \n",
- " 1.3 | \n",
- " 0.2 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 4.6 | \n",
- " 3.1 | \n",
- " 1.5 | \n",
- " 0.2 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5.0 | \n",
- " 3.6 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " sepal_length_cm sepal_width_cm ... prediction actual_label\n",
- "0 5.1 3.5 ... 0 0\n",
- "1 4.9 3.0 ... 0 0\n",
- "2 4.7 3.2 ... 0 0\n",
- "3 4.6 3.1 ... 0 0\n",
- "4 5.0 3.6 ... 0 0\n",
- "\n",
- "[5 rows x 6 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_samples['actual_label'] = iris_samples['target']\n",
"df_samples.head()"
@@ -544,136 +271,10 @@
"title": ""
}
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| sepal_length_cm | sepal_width_cm | petal_length_cm | petal_width_cm | species | id |
|---|
| 5.1 | 3.5 | 1.4 | 0.2 | 0 | 1 |
| 4.9 | 3.0 | 1.4 | 0.2 | 0 | 2 |
| 4.7 | 3.2 | 1.3 | 0.2 | 0 | 3 |
| 4.6 | 3.1 | 1.5 | 0.2 | 0 | 4 |
| 5.0 | 3.6 | 1.4 | 0.2 | 0 | 5 |
"
- ]
- },
- "metadata": {
- "application/vnd.databricks.v1+output": {
- "addedWidgets": {},
- "aggData": [],
- "aggError": "",
- "aggOverflow": false,
- "aggSchema": [],
- "aggSeriesLimitReached": false,
- "aggType": "",
- "arguments": {},
- "columnCustomDisplayInfos": {},
- "data": [
- [
- 5.1,
- 3.5,
- 1.4,
- 0.2,
- 0,
- 1
- ],
- [
- 4.9,
- 3,
- 1.4,
- 0.2,
- 0,
- 2
- ],
- [
- 4.7,
- 3.2,
- 1.3,
- 0.2,
- 0,
- 3
- ],
- [
- 4.6,
- 3.1,
- 1.5,
- 0.2,
- 0,
- 4
- ],
- [
- 5,
- 3.6,
- 1.4,
- 0.2,
- 0,
- 5
- ]
- ],
- "datasetInfos": [],
- "dbfsResultPath": null,
- "isJsonSchema": true,
- "metadata": {},
- "overflow": false,
- "plotOptions": {
- "customPlotOptions": {},
- "displayType": "table",
- "pivotAggregation": null,
- "pivotColumns": null,
- "xColumns": null,
- "yColumns": null
- },
- "removedWidgets": [],
- "schema": [
- {
- "metadata": "{}",
- "name": "sepal_length_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "sepal_width_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "petal_length_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "petal_width_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "species",
- "type": "\"long\""
- },
- {
- "metadata": "{}",
- "name": "id",
- "type": "\"long\""
- }
- ],
- "type": "table"
- }
- },
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"try:\n",
- " display(spark.table(f\"{catalog_name}.default.iris_data\").limit(5))\n",
+ " display(spark.table(f\"{catalog_name}.default.iris_inferences\").limit(5))\n",
" table_exists = True\n",
"except:\n",
" table_exists = False"
@@ -719,153 +320,7 @@
"title": ""
}
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| sepal_length_cm | sepal_width_cm | petal_length_cm | petal_width_cm | prediction | actual_label | prediction_timestamp | model_id |
|---|
| 5.1 | 3.5 | 1.4 | 0.2 | 0 | 0 | 2025-08-11 17:47:22 | 21 |
| 4.9 | 3.0 | 1.4 | 0.2 | 0 | 0 | 2025-08-11 17:47:22 | 21 |
| 4.7 | 3.2 | 1.3 | 0.2 | 0 | 0 | 2025-08-11 17:47:22 | 21 |
| 4.6 | 3.1 | 1.5 | 0.2 | 0 | 0 | 2025-08-11 17:47:22 | 21 |
| 5.0 | 3.6 | 1.4 | 0.2 | 0 | 0 | 2025-08-11 17:47:22 | 21 |
"
- ]
- },
- "metadata": {
- "application/vnd.databricks.v1+output": {
- "addedWidgets": {},
- "aggData": [],
- "aggError": "",
- "aggOverflow": false,
- "aggSchema": [],
- "aggSeriesLimitReached": false,
- "aggType": "",
- "arguments": {},
- "columnCustomDisplayInfos": {},
- "data": [
- [
- 5.1,
- 3.5,
- 1.4,
- 0.2,
- 0,
- 0,
- "2025-08-11 17:47:22",
- "21"
- ],
- [
- 4.9,
- 3,
- 1.4,
- 0.2,
- 0,
- 0,
- "2025-08-11 17:47:22",
- "21"
- ],
- [
- 4.7,
- 3.2,
- 1.3,
- 0.2,
- 0,
- 0,
- "2025-08-11 17:47:22",
- "21"
- ],
- [
- 4.6,
- 3.1,
- 1.5,
- 0.2,
- 0,
- 0,
- "2025-08-11 17:47:22",
- "21"
- ],
- [
- 5,
- 3.6,
- 1.4,
- 0.2,
- 0,
- 0,
- "2025-08-11 17:47:22",
- "21"
- ]
- ],
- "datasetInfos": [],
- "dbfsResultPath": null,
- "isJsonSchema": true,
- "metadata": {},
- "overflow": false,
- "plotOptions": {
- "customPlotOptions": {},
- "displayType": "table",
- "pivotAggregation": null,
- "pivotColumns": null,
- "xColumns": null,
- "yColumns": null
- },
- "removedWidgets": [],
- "schema": [
- {
- "metadata": "{}",
- "name": "sepal_length_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "sepal_width_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "petal_length_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "petal_width_cm",
- "type": "\"double\""
- },
- {
- "metadata": "{}",
- "name": "prediction",
- "type": "\"long\""
- },
- {
- "metadata": "{}",
- "name": "actual_label",
- "type": "\"long\""
- },
- {
- "metadata": "{}",
- "name": "prediction_timestamp",
- "type": "\"string\""
- },
- {
- "metadata": "{}",
- "name": "model_id",
- "type": "\"string\""
- }
- ],
- "type": "table"
- }
- },
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"display(spark.sql(f\"SELECT * FROM {catalog_name}.default.iris_inferences LIMIT 5\"))"
]
@@ -886,18 +341,7 @@
"title": ""
}
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "DataFrame[]"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Enabling the Change Data Feed is a recommended practice for Inference Monitoring using Lakehouse Monitoring\n",
"# When CDF is enabled, only newly appended data is processed. \n",
@@ -941,21 +385,21 @@
"label": null,
"name": "catalog_name",
"options": {
- "validationRegex": null,
- "widgetDisplayType": "Text"
+ "widgetDisplayType": "Text",
+ "validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
+ "widgetType": "text",
"defaultValue": "pedroz_e2edata_dev",
"label": null,
"name": "catalog_name",
"options": {
+ "widgetType": "text",
"autoCreated": null,
- "validationRegex": null,
- "widgetType": "text"
- },
- "widgetType": "text"
+ "validationRegex": null
+ }
}
}
}
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0892d0b..001315f 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -6,7 +6,7 @@ trigger:
variables:
- name: databricksHost
- value: 'https://adb-4181970831265458.18.azuredatabricks.net/'
+ value: '$(WORKSPACE_HOST_NAME)'
- name: pythonVersion
value: '3.9'
diff --git a/databricks.yml b/databricks.yml
index 85206ec..86783fb 100644
--- a/databricks.yml
+++ b/databricks.yml
@@ -9,7 +9,7 @@ targets:
mode: development
default: true
workspace:
- host: https://adb-4181970831265458.18.azuredatabricks.net/
+ root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
variables:
catalog_name: pedroz_e2edata_dev
environment: dev
@@ -44,7 +44,7 @@ targets:
prod:
mode: production
workspace:
- host: https://adb-4181970831265458.18.azuredatabricks.net/
+ root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
variables:
catalog_name: pedroz_e2edata_prod
environment: prod