diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..b290768 Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/databricks-deployment.yml b/.github/workflows/databricks-deployment.yml index fe3f508..490dbf8 100644 --- a/.github/workflows/databricks-deployment.yml +++ b/.github/workflows/databricks-deployment.yml @@ -7,7 +7,7 @@ on: - master env: - DATABRICKS_HOST: https://adb-4181970831265458.18.azuredatabricks.net/ + DATABRICKS_HOST: ${{ secrets.WORKSPACE_HOST_NAME }} PYTHON_VERSION: '3.9' jobs: diff --git a/Notebooks/.DS_Store b/Notebooks/.DS_Store new file mode 100644 index 0000000..e8b5dcb Binary files /dev/null and b/Notebooks/.DS_Store differ diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb index 291cd4d..56544cd 100644 --- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb +++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb @@ -18,7 +18,8 @@ }, "outputs": [], "source": [ - "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC" + "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC\n", + "# Dummy change" ] }, { @@ -240,21 +241,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb index ad80913..1e6f1ce 100644 --- a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb +++ b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb @@ -322,21 +322,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/Notebooks/3_Inference/batch-inference.ipynb b/Notebooks/3_Inference/batch-inference.ipynb index f348524..a5e6264 100644 --- a/Notebooks/3_Inference/batch-inference.ipynb +++ b/Notebooks/3_Inference/batch-inference.ipynb @@ -128,98 +128,7 @@ "title": "" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n", - "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cm
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n", - "0 5.1 3.5 1.4 0.2\n", - "1 4.9 3.0 1.4 0.2\n", - "2 4.7 3.2 1.3 0.2\n", - "3 4.6 3.1 1.5 0.2\n", - "4 5.0 3.6 1.4 0.2" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Pull the dataset for running the inference\n", "iris_samples = datasets.load_iris(as_frame=True)\n", @@ -266,94 +175,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmprediction
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm prediction\n", - "0 5.1 3.5 1.4 0.2 0\n", - "1 4.9 3.0 1.4 0.2 0\n", - "2 4.7 3.2 1.3 0.2 0\n", - "3 4.6 3.1 1.5 0.2 0\n", - "4 5.0 3.6 1.4 0.2 0" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "predictions = model.predict(df_samples)\n", "df_samples['prediction'] = predictions\n", @@ -377,102 +199,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmpredictionactual_label
05.13.51.40.200
14.93.01.40.200
24.73.21.30.200
34.63.11.50.200
45.03.61.40.200
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm ... prediction actual_label\n", - "0 5.1 3.5 ... 0 0\n", - "1 4.9 3.0 ... 0 0\n", - "2 4.7 3.2 ... 0 0\n", - "3 4.6 3.1 ... 0 0\n", - "4 5.0 3.6 ... 0 0\n", - "\n", - "[5 rows x 6 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_samples['actual_label'] = iris_samples['target']\n", "df_samples.head()" @@ -544,136 +271,10 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmspeciesid
5.13.51.40.201
4.93.01.40.202
4.73.21.30.203
4.63.11.50.204
5.03.61.40.205
" - ] - }, - "metadata": { - "application/vnd.databricks.v1+output": { - "addedWidgets": {}, - "aggData": [], - "aggError": "", - "aggOverflow": false, - "aggSchema": [], - "aggSeriesLimitReached": false, - "aggType": "", - "arguments": {}, - "columnCustomDisplayInfos": {}, - "data": [ - [ - 5.1, - 3.5, - 1.4, - 0.2, - 0, - 1 - ], - [ - 4.9, - 3, - 1.4, - 0.2, - 0, - 2 - ], - [ - 4.7, - 3.2, - 1.3, - 0.2, - 0, - 3 - ], - [ - 4.6, - 3.1, - 1.5, - 0.2, - 0, - 4 - ], - [ - 5, - 3.6, - 1.4, - 0.2, - 0, - 5 - ] - ], - "datasetInfos": [], - "dbfsResultPath": null, - "isJsonSchema": true, - "metadata": {}, - "overflow": false, - "plotOptions": { - "customPlotOptions": {}, - "displayType": "table", - "pivotAggregation": null, - "pivotColumns": null, - "xColumns": null, - "yColumns": null - }, - "removedWidgets": [], - "schema": [ - { - "metadata": "{}", - "name": "sepal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "sepal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "species", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "id", - "type": "\"long\"" - } - ], - "type": "table" - } - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "try:\n", - " display(spark.table(f\"{catalog_name}.default.iris_data\").limit(5))\n", + " display(spark.table(f\"{catalog_name}.default.iris_inferences\").limit(5))\n", " table_exists = True\n", "except:\n", " table_exists = False" @@ -719,153 +320,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmpredictionactual_labelprediction_timestampmodel_id
5.13.51.40.2002025-08-11 17:47:2221
4.93.01.40.2002025-08-11 17:47:2221
4.73.21.30.2002025-08-11 17:47:2221
4.63.11.50.2002025-08-11 17:47:2221
5.03.61.40.2002025-08-11 17:47:2221
" - ] - }, - "metadata": { - "application/vnd.databricks.v1+output": { - "addedWidgets": {}, - "aggData": [], - "aggError": "", - "aggOverflow": false, - "aggSchema": [], - "aggSeriesLimitReached": false, - "aggType": "", - "arguments": {}, - "columnCustomDisplayInfos": {}, - "data": [ - [ - 5.1, - 3.5, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.9, - 3, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.7, - 3.2, - 1.3, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.6, - 3.1, - 1.5, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 5, - 3.6, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ] - ], - "datasetInfos": [], - "dbfsResultPath": null, - "isJsonSchema": true, - "metadata": {}, - "overflow": false, - "plotOptions": { - "customPlotOptions": {}, - "displayType": "table", - "pivotAggregation": null, - "pivotColumns": null, - "xColumns": null, - "yColumns": null - }, - "removedWidgets": [], - "schema": [ - { - "metadata": "{}", - "name": "sepal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "sepal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "prediction", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "actual_label", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "prediction_timestamp", - "type": "\"string\"" - }, - { - "metadata": "{}", - "name": "model_id", - "type": "\"string\"" - } - ], - "type": "table" - } - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(spark.sql(f\"SELECT * FROM {catalog_name}.default.iris_inferences LIMIT 5\"))" ] @@ -886,18 +341,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "DataFrame[]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Enabling the Change Data Feed is a recommended practice for Inference Monitoring using Lakehouse Monitoring\n", "# When CDF is enabled, only newly appended data is processed. \n", @@ -941,21 +385,21 @@ "label": null, "name": "catalog_name", "options": { - "validationRegex": null, - "widgetDisplayType": "Text" + "widgetDisplayType": "Text", + "validationRegex": null }, "parameterDataType": "String" }, "widgetInfo": { + "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { + "widgetType": "text", "autoCreated": null, - "validationRegex": null, - "widgetType": "text" - }, - "widgetType": "text" + "validationRegex": null + } } } } diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0892d0b..001315f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,7 +6,7 @@ trigger: variables: - name: databricksHost - value: 'https://adb-4181970831265458.18.azuredatabricks.net/' + value: '$(WORKSPACE_HOST_NAME)' - name: pythonVersion value: '3.9' diff --git a/databricks.yml b/databricks.yml index 85206ec..86783fb 100644 --- a/databricks.yml +++ b/databricks.yml @@ -9,7 +9,7 @@ targets: mode: development default: true workspace: - host: https://adb-4181970831265458.18.azuredatabricks.net/ + root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target} variables: catalog_name: pedroz_e2edata_dev environment: dev @@ -44,7 +44,7 @@ targets: prod: mode: production workspace: - host: https://adb-4181970831265458.18.azuredatabricks.net/ + root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target} variables: catalog_name: pedroz_e2edata_prod environment: prod