From 8427fa3260453d163a48f1cbd9589bf3db2893f4 Mon Sep 17 00:00:00 2001 From: pedrozanlorensi Date: Wed, 3 Dec 2025 14:35:33 +0000 Subject: [PATCH 1/7] dummy commit --- Notebooks/1_DataPreprocessing/data-ingestion.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb index 291cd4d..449df11 100644 --- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb +++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb @@ -18,7 +18,8 @@ }, "outputs": [], "source": [ - "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC" + "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC\n", + "# Dummy change" ] }, { From 11c0a4c23bfd13745d075a2508a6974db4374993 Mon Sep 17 00:00:00 2001 From: pedrozanlorensi Date: Thu, 11 Dec 2025 20:30:07 +0000 Subject: [PATCH 2/7] remove dummy change --- Notebooks/1_DataPreprocessing/data-ingestion.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb index 449df11..291cd4d 100644 --- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb +++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb @@ -18,8 +18,7 @@ }, "outputs": [], "source": [ - "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC\n", - "# Dummy change" + "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC" ] }, { From 879232dcb9bbf7bc8445620a52e1ff9ec992f97c Mon Sep 17 00:00:00 2001 From: pedrozanlorensi Date: Wed, 7 Jan 2026 13:27:24 +0000 Subject: [PATCH 3/7] uploading change to ingestion --- Notebooks/1_DataPreprocessing/data-ingestion.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb index 291cd4d..449df11 100644 --- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb +++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb @@ -18,7 +18,8 @@ }, "outputs": [], "source": [ - "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC" + "# This notebook is meant to extract the data from sklearn.datasets and ingest it into a table in the UC\n", + "# Dummy change" ] }, { From b40bc7fff07d23d5f5284807b7cc87cb70cbd32b Mon Sep 17 00:00:00 2001 From: Pedro Zanlorensi Date: Fri, 9 Jan 2026 16:38:53 -0300 Subject: [PATCH 4/7] update env variables --- .github/workflows/databricks-deployment.yml | 2 +- azure-pipelines.yml | 2 +- databricks.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/databricks-deployment.yml b/.github/workflows/databricks-deployment.yml index fe3f508..490dbf8 100644 --- a/.github/workflows/databricks-deployment.yml +++ b/.github/workflows/databricks-deployment.yml @@ -7,7 +7,7 @@ on: - master env: - DATABRICKS_HOST: https://adb-4181970831265458.18.azuredatabricks.net/ + DATABRICKS_HOST: ${{ secrets.WORKSPACE_HOST_NAME }} PYTHON_VERSION: '3.9' jobs: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0892d0b..001315f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,7 +6,7 @@ trigger: variables: - name: databricksHost - value: 'https://adb-4181970831265458.18.azuredatabricks.net/' + value: '$(WORKSPACE_HOST_NAME)' - name: pythonVersion value: '3.9' diff --git a/databricks.yml b/databricks.yml index 85206ec..d79469e 100644 --- a/databricks.yml +++ b/databricks.yml @@ -9,7 +9,7 @@ targets: mode: development default: true workspace: - host: https://adb-4181970831265458.18.azuredatabricks.net/ + host: ${WORKSPACE_HOST_NAME} variables: catalog_name: pedroz_e2edata_dev environment: dev @@ -44,7 +44,7 @@ targets: prod: mode: production workspace: - host: https://adb-4181970831265458.18.azuredatabricks.net/ + host: ${WORKSPACE_HOST_NAME} variables: catalog_name: pedroz_e2edata_prod environment: prod From 921c935e0be4ea8dc3c7c1b2c67101d7be8b41b5 Mon Sep 17 00:00:00 2001 From: Pedro Zanlorensi Date: Fri, 9 Jan 2026 16:42:16 -0300 Subject: [PATCH 5/7] remove env variable --- databricks.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/databricks.yml b/databricks.yml index d79469e..5205401 100644 --- a/databricks.yml +++ b/databricks.yml @@ -8,8 +8,6 @@ targets: dev: mode: development default: true - workspace: - host: ${WORKSPACE_HOST_NAME} variables: catalog_name: pedroz_e2edata_dev environment: dev @@ -43,8 +41,6 @@ targets: prod: mode: production - workspace: - host: ${WORKSPACE_HOST_NAME} variables: catalog_name: pedroz_e2edata_prod environment: prod From e7efa4c4beba84fb976239ba32db0263818df94c Mon Sep 17 00:00:00 2001 From: pedrozanlorensi Date: Fri, 9 Jan 2026 19:47:31 +0000 Subject: [PATCH 6/7] fix table name --- Notebooks/3_Inference/batch-inference.ipynb | 582 +------------------- 1 file changed, 13 insertions(+), 569 deletions(-) diff --git a/Notebooks/3_Inference/batch-inference.ipynb b/Notebooks/3_Inference/batch-inference.ipynb index f348524..a5e6264 100644 --- a/Notebooks/3_Inference/batch-inference.ipynb +++ b/Notebooks/3_Inference/batch-inference.ipynb @@ -128,98 +128,7 @@ "title": "" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n", - "/home/spark-95fe552f-1bd8-4fee-8597-dd/.ipykernel/7148/command-8412231637893746-4118003963:4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cm
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n", - "0 5.1 3.5 1.4 0.2\n", - "1 4.9 3.0 1.4 0.2\n", - "2 4.7 3.2 1.3 0.2\n", - "3 4.6 3.1 1.5 0.2\n", - "4 5.0 3.6 1.4 0.2" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Pull the dataset for running the inference\n", "iris_samples = datasets.load_iris(as_frame=True)\n", @@ -266,94 +175,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmprediction
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm prediction\n", - "0 5.1 3.5 1.4 0.2 0\n", - "1 4.9 3.0 1.4 0.2 0\n", - "2 4.7 3.2 1.3 0.2 0\n", - "3 4.6 3.1 1.5 0.2 0\n", - "4 5.0 3.6 1.4 0.2 0" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "predictions = model.predict(df_samples)\n", "df_samples['prediction'] = predictions\n", @@ -377,102 +199,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmpredictionactual_label
05.13.51.40.200
14.93.01.40.200
24.73.21.30.200
34.63.11.50.200
45.03.61.40.200
\n", - "
" - ], - "text/plain": [ - " sepal_length_cm sepal_width_cm ... prediction actual_label\n", - "0 5.1 3.5 ... 0 0\n", - "1 4.9 3.0 ... 0 0\n", - "2 4.7 3.2 ... 0 0\n", - "3 4.6 3.1 ... 0 0\n", - "4 5.0 3.6 ... 0 0\n", - "\n", - "[5 rows x 6 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_samples['actual_label'] = iris_samples['target']\n", "df_samples.head()" @@ -544,136 +271,10 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmspeciesid
5.13.51.40.201
4.93.01.40.202
4.73.21.30.203
4.63.11.50.204
5.03.61.40.205
" - ] - }, - "metadata": { - "application/vnd.databricks.v1+output": { - "addedWidgets": {}, - "aggData": [], - "aggError": "", - "aggOverflow": false, - "aggSchema": [], - "aggSeriesLimitReached": false, - "aggType": "", - "arguments": {}, - "columnCustomDisplayInfos": {}, - "data": [ - [ - 5.1, - 3.5, - 1.4, - 0.2, - 0, - 1 - ], - [ - 4.9, - 3, - 1.4, - 0.2, - 0, - 2 - ], - [ - 4.7, - 3.2, - 1.3, - 0.2, - 0, - 3 - ], - [ - 4.6, - 3.1, - 1.5, - 0.2, - 0, - 4 - ], - [ - 5, - 3.6, - 1.4, - 0.2, - 0, - 5 - ] - ], - "datasetInfos": [], - "dbfsResultPath": null, - "isJsonSchema": true, - "metadata": {}, - "overflow": false, - "plotOptions": { - "customPlotOptions": {}, - "displayType": "table", - "pivotAggregation": null, - "pivotColumns": null, - "xColumns": null, - "yColumns": null - }, - "removedWidgets": [], - "schema": [ - { - "metadata": "{}", - "name": "sepal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "sepal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "species", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "id", - "type": "\"long\"" - } - ], - "type": "table" - } - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "try:\n", - " display(spark.table(f\"{catalog_name}.default.iris_data\").limit(5))\n", + " display(spark.table(f\"{catalog_name}.default.iris_inferences\").limit(5))\n", " table_exists = True\n", "except:\n", " table_exists = False" @@ -719,153 +320,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmpredictionactual_labelprediction_timestampmodel_id
5.13.51.40.2002025-08-11 17:47:2221
4.93.01.40.2002025-08-11 17:47:2221
4.73.21.30.2002025-08-11 17:47:2221
4.63.11.50.2002025-08-11 17:47:2221
5.03.61.40.2002025-08-11 17:47:2221
" - ] - }, - "metadata": { - "application/vnd.databricks.v1+output": { - "addedWidgets": {}, - "aggData": [], - "aggError": "", - "aggOverflow": false, - "aggSchema": [], - "aggSeriesLimitReached": false, - "aggType": "", - "arguments": {}, - "columnCustomDisplayInfos": {}, - "data": [ - [ - 5.1, - 3.5, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.9, - 3, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.7, - 3.2, - 1.3, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 4.6, - 3.1, - 1.5, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ], - [ - 5, - 3.6, - 1.4, - 0.2, - 0, - 0, - "2025-08-11 17:47:22", - "21" - ] - ], - "datasetInfos": [], - "dbfsResultPath": null, - "isJsonSchema": true, - "metadata": {}, - "overflow": false, - "plotOptions": { - "customPlotOptions": {}, - "displayType": "table", - "pivotAggregation": null, - "pivotColumns": null, - "xColumns": null, - "yColumns": null - }, - "removedWidgets": [], - "schema": [ - { - "metadata": "{}", - "name": "sepal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "sepal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_length_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "petal_width_cm", - "type": "\"double\"" - }, - { - "metadata": "{}", - "name": "prediction", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "actual_label", - "type": "\"long\"" - }, - { - "metadata": "{}", - "name": "prediction_timestamp", - "type": "\"string\"" - }, - { - "metadata": "{}", - "name": "model_id", - "type": "\"string\"" - } - ], - "type": "table" - } - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(spark.sql(f\"SELECT * FROM {catalog_name}.default.iris_inferences LIMIT 5\"))" ] @@ -886,18 +341,7 @@ "title": "" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "DataFrame[]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Enabling the Change Data Feed is a recommended practice for Inference Monitoring using Lakehouse Monitoring\n", "# When CDF is enabled, only newly appended data is processed. \n", @@ -941,21 +385,21 @@ "label": null, "name": "catalog_name", "options": { - "validationRegex": null, - "widgetDisplayType": "Text" + "widgetDisplayType": "Text", + "validationRegex": null }, "parameterDataType": "String" }, "widgetInfo": { + "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { + "widgetType": "text", "autoCreated": null, - "validationRegex": null, - "widgetType": "text" - }, - "widgetType": "text" + "validationRegex": null + } } } } From 0a7578977d97a82bec8a23c0ed2aa4389dd7f238 Mon Sep 17 00:00:00 2001 From: Pedro Zanlorensi Date: Thu, 22 Jan 2026 17:46:51 -0300 Subject: [PATCH 7/7] re-add workspace root_path --- .DS_Store | Bin 0 -> 6148 bytes Notebooks/.DS_Store | Bin 0 -> 6148 bytes .../1_DataPreprocessing/data-ingestion.ipynb | 12 ++++++------ .../model-training.ipynb | 12 ++++++------ databricks.yml | 4 ++++ 5 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 .DS_Store create mode 100644 Notebooks/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b290768c264e21ebee3c6deb127fd82541983688 GIT binary patch literal 6148 zcmeHK%SyvQ6g^WbwAw{+<*H=sM(_iKSmIU+{eVgzR)JmM#DDM) z^ea4fW@r-=18zk0UYI$P$+?r06Ox+&VA_k}9?%3(V-u{@*%gGGmzIl)pJ@<{U89E+ zoMDUwE^=(0_>B(8vuj|87z50aetsj&C*!^DAfC>w@ap?R8Fv~-QGXKm$*Ju(?(c3} zF9+rP;kWp+)0CEVtEi46>b}GXePWFBPm0Y(mAnF0T5D(b?e%fXYAs~0RF*R{`bPBtelCQ0~o0D1_upP5W2rqHCOyR(mVsfPvZ?oCpd>{j2=rM7~8HWA{SR1rc I2Y%Fn5AvaYRR910 literal 0 HcmV?d00001 diff --git a/Notebooks/.DS_Store b/Notebooks/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e8b5dcbaf6a5ab0bb35b952e82fdfd4db8f2797c GIT binary patch literal 6148 zcmeH~zfJ-{5XNVff(QkPjWHBg8oSfR@*rnHVPb-{3LK)z9Y+uojm?#ow!VtJ`Ut*` z-|SBCE{Ojy#+Vth`z?Djdpp0dyDSlz=0)Ta)rhE!!dhBE^@j0tE;F_yJyoc&V@#++ z`!uFLjb^;HLkTE>zea%9?kf3oL?PC0e*KOqr5;xDIX-7Jq(MCJ+r2cAmAzj7d}NY_ zVK+{@h$y$7tJe=VPmQlcM+-!cPV=+&@LHR)c$L47Bqz1B$z=L1$$fy4=T@Gd{(Ur3S4 zcyqpIKIfgsY0h)T&1dwncX7TM&sXhz0ywkT$`z+tD*+{-1f~RdfACOP1JTT>-8xX& zD*&{FZfj`sU4mmg(LgkFiYGARLZL3y$Q8r5aP)iP7l>v~T{s!Jd>DDNksFG!Z^!ta zbte-z)mjNCfg*ur_qE37|L*+ze=$hEDFG$$uLzh@*bbXWNzT@pxa9*e`Qx kLr}?aY#V$Ouc27OGmjh4Ks0lT9+>_Suo$#b0>4V&1!D)Na{vGU literal 0 HcmV?d00001 diff --git a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb index 449df11..56544cd 100644 --- a/Notebooks/1_DataPreprocessing/data-ingestion.ipynb +++ b/Notebooks/1_DataPreprocessing/data-ingestion.ipynb @@ -241,21 +241,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb index ad80913..1e6f1ce 100644 --- a/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb +++ b/Notebooks/2_ModelTrainingAndDeployment/model-training.ipynb @@ -322,21 +322,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "pedroz_e2edata_dev", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/databricks.yml b/databricks.yml index 5205401..86783fb 100644 --- a/databricks.yml +++ b/databricks.yml @@ -8,6 +8,8 @@ targets: dev: mode: development default: true + workspace: + root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target} variables: catalog_name: pedroz_e2edata_dev environment: dev @@ -41,6 +43,8 @@ targets: prod: mode: production + workspace: + root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target} variables: catalog_name: pedroz_e2edata_prod environment: prod