From 6b5133404eda3b6e7dad2b91b2a0b47f1d22cdf0 Mon Sep 17 00:00:00 2001 From: Kacper Muda Date: Wed, 6 May 2026 15:20:34 +0200 Subject: [PATCH] feat: Add uri sanitizers and asset factories for new schemes --- providers/amazon/provider.yaml | 8 ++ .../providers/amazon/aws/assets/redshift.py | 55 ++++++++ .../providers/amazon/get_provider_info.py | 16 ++- .../unit/amazon/aws/assets/test_redshift.py | 90 +++++++++++++ providers/apache/hdfs/provider.yaml | 14 ++ .../providers/apache/hdfs/assets/__init__.py | 16 +++ .../providers/apache/hdfs/assets/hdfs.py | 50 +++++++ .../apache/hdfs/get_provider_info.py | 16 +++ .../tests/unit/apache/hdfs/assets/__init__.py | 16 +++ .../unit/apache/hdfs/assets/test_hdfs.py | 92 +++++++++++++ providers/apache/hive/provider.yaml | 14 ++ .../providers/apache/hive/assets/__init__.py | 16 +++ .../providers/apache/hive/assets/hive.py | 52 ++++++++ .../apache/hive/get_provider_info.py | 16 +++ .../tests/unit/apache/hive/assets/__init__.py | 16 +++ .../unit/apache/hive/assets/test_hive.py | 84 ++++++++++++ providers/apache/kafka/provider.yaml | 14 ++ .../providers/apache/kafka/assets/__init__.py | 16 +++ .../providers/apache/kafka/assets/kafka.py | 49 +++++++ .../apache/kafka/get_provider_info.py | 16 +++ .../unit/apache/kafka/assets/__init__.py | 16 +++ .../unit/apache/kafka/assets/test_kafka.py | 77 +++++++++++ providers/databricks/provider.yaml | 14 ++ .../providers/databricks/assets/__init__.py | 16 +++ .../providers/databricks/assets/databricks.py | 53 ++++++++ .../providers/databricks/get_provider_info.py | 16 +++ .../tests/unit/databricks/assets/__init__.py | 16 +++ .../unit/databricks/assets/test_databricks.py | 77 +++++++++++ providers/ftp/provider.yaml | 14 ++ .../airflow/providers/ftp/assets/__init__.py | 16 +++ .../src/airflow/providers/ftp/assets/ftp.py | 53 ++++++++ .../providers/ftp/get_provider_info.py | 16 +++ .../ftp/tests/unit/ftp/assets/__init__.py | 16 +++ .../ftp/tests/unit/ftp/assets/test_ftp.py | 88 ++++++++++++ providers/microsoft/mssql/docs/index.rst | 15 ++- providers/microsoft/mssql/provider.yaml | 14 ++ providers/microsoft/mssql/pyproject.toml | 4 + .../microsoft/mssql/assets/__init__.py | 16 +++ .../providers/microsoft/mssql/assets/mssql.py | 55 ++++++++ .../microsoft/mssql/get_provider_info.py | 16 +++ .../unit/microsoft/mssql/assets/__init__.py | 16 +++ .../unit/microsoft/mssql/assets/test_mssql.py | 125 ++++++++++++++++++ providers/mongo/provider.yaml | 14 ++ .../providers/mongo/assets/__init__.py | 16 +++ .../airflow/providers/mongo/assets/mongo.py | 55 ++++++++ .../providers/mongo/get_provider_info.py | 16 +++ .../mongo/tests/unit/mongo/assets/__init__.py | 16 +++ .../tests/unit/mongo/assets/test_mongo.py | 97 ++++++++++++++ providers/mysql/provider.yaml | 4 + .../airflow/providers/mysql/assets/mysql.py | 21 +++ .../providers/mysql/get_provider_info.py | 14 +- .../tests/unit/mysql/assets/test_mysql.py | 37 +++++- providers/oracle/provider.yaml | 14 ++ .../providers/oracle/assets/__init__.py | 16 +++ .../airflow/providers/oracle/assets/oracle.py | 63 +++++++++ .../providers/oracle/get_provider_info.py | 16 +++ .../tests/unit/oracle/assets/__init__.py | 16 +++ .../tests/unit/oracle/assets/test_oracle.py | 125 ++++++++++++++++++ providers/postgres/provider.yaml | 4 + .../providers/postgres/assets/postgres.py | 21 +++ .../providers/postgres/get_provider_info.py | 4 + .../unit/postgres/assets/test_postgres.py | 61 ++++++++- providers/presto/provider.yaml | 14 ++ .../providers/presto/assets/__init__.py | 16 +++ .../airflow/providers/presto/assets/presto.py | 55 ++++++++ .../providers/presto/get_provider_info.py | 16 +++ .../tests/unit/presto/assets/__init__.py | 16 +++ .../tests/unit/presto/assets/test_presto.py | 96 ++++++++++++++ providers/sftp/provider.yaml | 14 ++ .../airflow/providers/sftp/assets/__init__.py | 16 +++ .../src/airflow/providers/sftp/assets/sftp.py | 53 ++++++++ .../providers/sftp/get_provider_info.py | 16 +++ .../sftp/tests/unit/sftp/assets/__init__.py | 16 +++ .../sftp/tests/unit/sftp/assets/test_sftp.py | 88 ++++++++++++ providers/snowflake/provider.yaml | 14 ++ .../providers/snowflake/assets/__init__.py | 16 +++ .../providers/snowflake/assets/snowflake.py | 50 +++++++ .../providers/snowflake/get_provider_info.py | 16 +++ .../tests/unit/snowflake/assets/__init__.py | 16 +++ .../unit/snowflake/assets/test_snowflake.py | 72 ++++++++++ providers/teradata/provider.yaml | 14 ++ .../providers/teradata/assets/__init__.py | 16 +++ .../providers/teradata/assets/teradata.py | 55 ++++++++ .../providers/teradata/get_provider_info.py | 16 +++ .../tests/unit/teradata/assets/__init__.py | 16 +++ .../unit/teradata/assets/test_teradata.py | 60 +++++++++ providers/trino/provider.yaml | 4 + .../airflow/providers/trino/assets/trino.py | 21 +++ .../providers/trino/get_provider_info.py | 14 +- .../tests/unit/trino/assets/test_trino.py | 20 ++- providers/vertica/provider.yaml | 14 ++ .../providers/vertica/assets/__init__.py | 16 +++ .../providers/vertica/assets/vertica.py | 55 ++++++++ .../providers/vertica/get_provider_info.py | 16 +++ .../tests/unit/vertica/assets/__init__.py | 16 +++ .../tests/unit/vertica/assets/test_vertica.py | 98 ++++++++++++++ .../tests/task_sdk/definitions/test_asset.py | 4 +- uv.lock | 44 +++--- 98 files changed, 3107 insertions(+), 37 deletions(-) create mode 100644 providers/amazon/src/airflow/providers/amazon/aws/assets/redshift.py create mode 100644 providers/amazon/tests/unit/amazon/aws/assets/test_redshift.py create mode 100644 providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/__init__.py create mode 100644 providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/hdfs.py create mode 100644 providers/apache/hdfs/tests/unit/apache/hdfs/assets/__init__.py create mode 100644 providers/apache/hdfs/tests/unit/apache/hdfs/assets/test_hdfs.py create mode 100644 providers/apache/hive/src/airflow/providers/apache/hive/assets/__init__.py create mode 100644 providers/apache/hive/src/airflow/providers/apache/hive/assets/hive.py create mode 100644 providers/apache/hive/tests/unit/apache/hive/assets/__init__.py create mode 100644 providers/apache/hive/tests/unit/apache/hive/assets/test_hive.py create mode 100644 providers/apache/kafka/src/airflow/providers/apache/kafka/assets/__init__.py create mode 100644 providers/apache/kafka/src/airflow/providers/apache/kafka/assets/kafka.py create mode 100644 providers/apache/kafka/tests/unit/apache/kafka/assets/__init__.py create mode 100644 providers/apache/kafka/tests/unit/apache/kafka/assets/test_kafka.py create mode 100644 providers/databricks/src/airflow/providers/databricks/assets/__init__.py create mode 100644 providers/databricks/src/airflow/providers/databricks/assets/databricks.py create mode 100644 providers/databricks/tests/unit/databricks/assets/__init__.py create mode 100644 providers/databricks/tests/unit/databricks/assets/test_databricks.py create mode 100644 providers/ftp/src/airflow/providers/ftp/assets/__init__.py create mode 100644 providers/ftp/src/airflow/providers/ftp/assets/ftp.py create mode 100644 providers/ftp/tests/unit/ftp/assets/__init__.py create mode 100644 providers/ftp/tests/unit/ftp/assets/test_ftp.py create mode 100644 providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/__init__.py create mode 100644 providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py create mode 100644 providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/__init__.py create mode 100644 providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py create mode 100644 providers/mongo/src/airflow/providers/mongo/assets/__init__.py create mode 100644 providers/mongo/src/airflow/providers/mongo/assets/mongo.py create mode 100644 providers/mongo/tests/unit/mongo/assets/__init__.py create mode 100644 providers/mongo/tests/unit/mongo/assets/test_mongo.py create mode 100644 providers/oracle/src/airflow/providers/oracle/assets/__init__.py create mode 100644 providers/oracle/src/airflow/providers/oracle/assets/oracle.py create mode 100644 providers/oracle/tests/unit/oracle/assets/__init__.py create mode 100644 providers/oracle/tests/unit/oracle/assets/test_oracle.py create mode 100644 providers/presto/src/airflow/providers/presto/assets/__init__.py create mode 100644 providers/presto/src/airflow/providers/presto/assets/presto.py create mode 100644 providers/presto/tests/unit/presto/assets/__init__.py create mode 100644 providers/presto/tests/unit/presto/assets/test_presto.py create mode 100644 providers/sftp/src/airflow/providers/sftp/assets/__init__.py create mode 100644 providers/sftp/src/airflow/providers/sftp/assets/sftp.py create mode 100644 providers/sftp/tests/unit/sftp/assets/__init__.py create mode 100644 providers/sftp/tests/unit/sftp/assets/test_sftp.py create mode 100644 providers/snowflake/src/airflow/providers/snowflake/assets/__init__.py create mode 100644 providers/snowflake/src/airflow/providers/snowflake/assets/snowflake.py create mode 100644 providers/snowflake/tests/unit/snowflake/assets/__init__.py create mode 100644 providers/snowflake/tests/unit/snowflake/assets/test_snowflake.py create mode 100644 providers/teradata/src/airflow/providers/teradata/assets/__init__.py create mode 100644 providers/teradata/src/airflow/providers/teradata/assets/teradata.py create mode 100644 providers/teradata/tests/unit/teradata/assets/__init__.py create mode 100644 providers/teradata/tests/unit/teradata/assets/test_teradata.py create mode 100644 providers/vertica/src/airflow/providers/vertica/assets/__init__.py create mode 100644 providers/vertica/src/airflow/providers/vertica/assets/vertica.py create mode 100644 providers/vertica/tests/unit/vertica/assets/__init__.py create mode 100644 providers/vertica/tests/unit/vertica/assets/test_vertica.py diff --git a/providers/amazon/provider.yaml b/providers/amazon/provider.yaml index df73ba8c7cadd..e8560c29946de 100644 --- a/providers/amazon/provider.yaml +++ b/providers/amazon/provider.yaml @@ -620,6 +620,10 @@ asset-uris: handler: airflow.providers.amazon.aws.assets.s3.sanitize_uri to_openlineage_converter: airflow.providers.amazon.aws.assets.s3.convert_asset_to_openlineage factory: airflow.providers.amazon.aws.assets.s3.create_asset + - schemes: [redshift] + handler: airflow.providers.amazon.aws.assets.redshift.sanitize_uri + factory: airflow.providers.amazon.aws.assets.redshift.create_asset + to_openlineage_converter: airflow.providers.amazon.aws.assets.redshift.convert_asset_to_openlineage # dataset has been renamed to asset in Airflow 3.0 # This is kept for backward compatibility. @@ -628,6 +632,10 @@ dataset-uris: handler: airflow.providers.amazon.aws.assets.s3.sanitize_uri to_openlineage_converter: airflow.providers.amazon.aws.assets.s3.convert_asset_to_openlineage factory: airflow.providers.amazon.aws.assets.s3.create_asset + - schemes: [redshift] + handler: airflow.providers.amazon.aws.assets.redshift.sanitize_uri + factory: airflow.providers.amazon.aws.assets.redshift.create_asset + to_openlineage_converter: airflow.providers.amazon.aws.assets.redshift.convert_asset_to_openlineage filesystems: - airflow.providers.amazon.aws.fs.s3 diff --git a/providers/amazon/src/airflow/providers/amazon/aws/assets/redshift.py b/providers/amazon/src/airflow/providers/amazon/aws/assets/redshift.py new file mode 100644 index 0000000000000..2747cca54e54b --- /dev/null +++ b/providers/amazon/src/airflow/providers/amazon/aws/assets/redshift.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format redshift:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:5439") + if len(uri.path.split("/")) != 4: # Leading slash, database, schema, and table names. + raise ValueError("URI format redshift:// must contain database, schema, and table names") + return uri + + +def create_asset( + *, host: str, database: str, schema: str, table: str, port: int = 5439, extra: dict | None = None +) -> Asset: + return Asset(uri=f"redshift://{host}:{port}/{database}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, schema, table = parsed.path.split("/") # Leading slash, database, schema, and table names. + return OpenLineageDataset(namespace=f"redshift://{parsed.netloc}", name=f"{database}.{schema}.{table}") diff --git a/providers/amazon/src/airflow/providers/amazon/get_provider_info.py b/providers/amazon/src/airflow/providers/amazon/get_provider_info.py index 1363bd84d2ab2..99721f3c481b9 100644 --- a/providers/amazon/src/airflow/providers/amazon/get_provider_info.py +++ b/providers/amazon/src/airflow/providers/amazon/get_provider_info.py @@ -667,7 +667,13 @@ def get_provider_info(): "handler": "airflow.providers.amazon.aws.assets.s3.sanitize_uri", "to_openlineage_converter": "airflow.providers.amazon.aws.assets.s3.convert_asset_to_openlineage", "factory": "airflow.providers.amazon.aws.assets.s3.create_asset", - } + }, + { + "schemes": ["redshift"], + "handler": "airflow.providers.amazon.aws.assets.redshift.sanitize_uri", + "factory": "airflow.providers.amazon.aws.assets.redshift.create_asset", + "to_openlineage_converter": "airflow.providers.amazon.aws.assets.redshift.convert_asset_to_openlineage", + }, ], "dataset-uris": [ { @@ -675,7 +681,13 @@ def get_provider_info(): "handler": "airflow.providers.amazon.aws.assets.s3.sanitize_uri", "to_openlineage_converter": "airflow.providers.amazon.aws.assets.s3.convert_asset_to_openlineage", "factory": "airflow.providers.amazon.aws.assets.s3.create_asset", - } + }, + { + "schemes": ["redshift"], + "handler": "airflow.providers.amazon.aws.assets.redshift.sanitize_uri", + "factory": "airflow.providers.amazon.aws.assets.redshift.create_asset", + "to_openlineage_converter": "airflow.providers.amazon.aws.assets.redshift.convert_asset_to_openlineage", + }, ], "filesystems": ["airflow.providers.amazon.aws.fs.s3"], "hooks": [ diff --git a/providers/amazon/tests/unit/amazon/aws/assets/test_redshift.py b/providers/amazon/tests/unit/amazon/aws/assets/test_redshift.py new file mode 100644 index 0000000000000..30429e257435d --- /dev/null +++ b/providers/amazon/tests/unit/amazon/aws/assets/test_redshift.py @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.amazon.aws.assets.redshift import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) +from airflow.providers.common.compat.assets import Asset + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "redshift://cluster.us-east-1:5439/database/schema/table", + "redshift://cluster.us-east-1:5439/database/schema/table", + id="normalized", + ), + pytest.param( + "redshift://cluster.us-east-1/database/schema/table", + "redshift://cluster.us-east-1:5439/database/schema/table", + id="default-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("redshift://", id="blank"), + pytest.param("redshift:///database/schema/table", id="no-host"), + pytest.param("redshift://host/database/table", id="missing-component"), + pytest.param("redshift://host/database/schema/table/column", id="extra-component"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format redshift:// must contain"): + sanitize_uri(uri_i) + + +def test_sanitize_uri_fail_non_port() -> None: + uri_i = urllib.parse.urlsplit("redshift://cluster.us-east-1:abcd/database/schema/table") + with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): + sanitize_uri(uri_i) + + +def test_create_asset() -> None: + result = create_asset(host="cluster.us-east-1", database="mydb", schema="public", table="users") + assert result == Asset(uri="redshift://cluster.us-east-1:5439/mydb/public/users") + + +def test_create_asset_custom_port() -> None: + result = create_asset( + host="cluster.us-east-1", port=5440, database="mydb", schema="public", table="users" + ) + assert result == Asset(uri="redshift://cluster.us-east-1:5440/mydb/public/users") + + +def test_convert_asset_to_openlineage() -> None: + asset = Asset(uri="redshift://cluster.us-east-1:5439/mydb/public/users") + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "redshift://cluster.us-east-1:5439" + assert ol_dataset.name == "mydb.public.users" diff --git a/providers/apache/hdfs/provider.yaml b/providers/apache/hdfs/provider.yaml index 1f9cc0e6d9f48..ca6ae70bfe07c 100644 --- a/providers/apache/hdfs/provider.yaml +++ b/providers/apache/hdfs/provider.yaml @@ -92,6 +92,20 @@ sensors: python-modules: - airflow.providers.apache.hdfs.sensors.web_hdfs +asset-uris: + - schemes: [hdfs] + handler: airflow.providers.apache.hdfs.assets.hdfs.sanitize_uri + factory: airflow.providers.apache.hdfs.assets.hdfs.create_asset + to_openlineage_converter: airflow.providers.apache.hdfs.assets.hdfs.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [hdfs] + handler: airflow.providers.apache.hdfs.assets.hdfs.sanitize_uri + factory: airflow.providers.apache.hdfs.assets.hdfs.create_asset + to_openlineage_converter: airflow.providers.apache.hdfs.assets.hdfs.convert_asset_to_openlineage + hooks: - integration-name: WebHDFS python-modules: diff --git a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/__init__.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/hdfs.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/hdfs.py new file mode 100644 index 0000000000000..5f3b716f880b5 --- /dev/null +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/assets/hdfs.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format hdfs:// must contain a namenode host") + if not uri.path: + raise ValueError("URI format hdfs:// must contain a path") + return uri + + +def create_asset(*, host: str, path: str, port: int = 8020, extra: dict | None = None) -> Asset: + return Asset(uri=f"hdfs://{host}:{port}/{path}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + path = parsed.path[1:] if parsed.path.startswith("/") else parsed.path + return OpenLineageDataset(namespace=f"hdfs://{parsed.netloc}", name=path or "/") diff --git a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py index 3eaa922320657..381cc777b4547 100644 --- a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py @@ -46,6 +46,22 @@ def get_provider_info(): "python-modules": ["airflow.providers.apache.hdfs.sensors.web_hdfs"], } ], + "asset-uris": [ + { + "schemes": ["hdfs"], + "handler": "airflow.providers.apache.hdfs.assets.hdfs.sanitize_uri", + "factory": "airflow.providers.apache.hdfs.assets.hdfs.create_asset", + "to_openlineage_converter": "airflow.providers.apache.hdfs.assets.hdfs.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["hdfs"], + "handler": "airflow.providers.apache.hdfs.assets.hdfs.sanitize_uri", + "factory": "airflow.providers.apache.hdfs.assets.hdfs.create_asset", + "to_openlineage_converter": "airflow.providers.apache.hdfs.assets.hdfs.convert_asset_to_openlineage", + } + ], "hooks": [ {"integration-name": "WebHDFS", "python-modules": ["airflow.providers.apache.hdfs.hooks.webhdfs"]} ], diff --git a/providers/apache/hdfs/tests/unit/apache/hdfs/assets/__init__.py b/providers/apache/hdfs/tests/unit/apache/hdfs/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/hdfs/tests/unit/apache/hdfs/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/hdfs/tests/unit/apache/hdfs/assets/test_hdfs.py b/providers/apache/hdfs/tests/unit/apache/hdfs/assets/test_hdfs.py new file mode 100644 index 0000000000000..1c7973b38ce4a --- /dev/null +++ b/providers/apache/hdfs/tests/unit/apache/hdfs/assets/test_hdfs.py @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.apache.hdfs.assets.hdfs import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) +from airflow.providers.common.compat.assets import Asset + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "hdfs://namenode:8020/data/file.csv", + "hdfs://namenode:8020/data/file.csv", + id="normalized", + ), + pytest.param( + "hdfs://namenode/data/file.csv", + "hdfs://namenode/data/file.csv", + id="no-explicit-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("hdfs://", id="blank"), + pytest.param("hdfs:///path/to/file", id="no-host"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format hdfs:// must contain"): + sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("path", "expected_uri"), + [ + pytest.param("/data/file.csv", "hdfs://namenode:8020//data/file.csv", id="root"), + pytest.param("data/file.csv", "hdfs://namenode:8020/data/file.csv", id="no-leading-slash"), + ], +) +def test_create_asset(path: str, expected_uri: str) -> None: + result = create_asset(host="namenode", path=path) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("expected_name", "uri"), + [ + pytest.param("/", "hdfs://namenode:8020", id="no-path"), + pytest.param("/", "hdfs://namenode:8020/", id="path-slash-only"), + pytest.param("/data/file.csv", "hdfs://namenode:8020//data/file.csv", id="root"), + pytest.param("data/file.csv", "hdfs://namenode:8020/data/file.csv", id="no-leading-slash"), + pytest.param("//data/file.csv", "hdfs://namenode:8020///data/file.csv", id="two-slashes"), + ], +) +def test_convert_asset_to_openlineage(expected_name, uri) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "hdfs://namenode:8020" + assert ol_dataset.name == expected_name diff --git a/providers/apache/hive/provider.yaml b/providers/apache/hive/provider.yaml index 0bb4498ebafaa..ef7f4e91aac3d 100644 --- a/providers/apache/hive/provider.yaml +++ b/providers/apache/hive/provider.yaml @@ -118,6 +118,20 @@ sensors: - airflow.providers.apache.hive.sensors.metastore_partition - airflow.providers.apache.hive.sensors.named_hive_partition +asset-uris: + - schemes: [hive] + handler: airflow.providers.apache.hive.assets.hive.sanitize_uri + factory: airflow.providers.apache.hive.assets.hive.create_asset + to_openlineage_converter: airflow.providers.apache.hive.assets.hive.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [hive] + handler: airflow.providers.apache.hive.assets.hive.sanitize_uri + factory: airflow.providers.apache.hive.assets.hive.create_asset + to_openlineage_converter: airflow.providers.apache.hive.assets.hive.convert_asset_to_openlineage + hooks: - integration-name: Apache Hive python-modules: diff --git a/providers/apache/hive/src/airflow/providers/apache/hive/assets/__init__.py b/providers/apache/hive/src/airflow/providers/apache/hive/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/hive/src/airflow/providers/apache/hive/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/hive/src/airflow/providers/apache/hive/assets/hive.py b/providers/apache/hive/src/airflow/providers/apache/hive/assets/hive.py new file mode 100644 index 0000000000000..950c7a77457b6 --- /dev/null +++ b/providers/apache/hive/src/airflow/providers/apache/hive/assets/hive.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format hive:// must contain a host") + if len(uri.path.split("/")) != 3: # Leading slash, database and table names. + raise ValueError("URI format hive:// must contain database, schema, and table names") + return uri + + +def create_asset( + *, host: str, database: str, table: str, port: int = 10000, extra: dict | None = None +) -> Asset: + return Asset(uri=f"hive://{host}:{port}/{database}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, table = parsed.path.split("/") # Leading slash, database and table names. + return OpenLineageDataset(namespace=f"hive://{parsed.netloc}", name=f"{database}.{table}") diff --git a/providers/apache/hive/src/airflow/providers/apache/hive/get_provider_info.py b/providers/apache/hive/src/airflow/providers/apache/hive/get_provider_info.py index 7fe5d92770496..f9d7cd5b0d7ce 100644 --- a/providers/apache/hive/src/airflow/providers/apache/hive/get_provider_info.py +++ b/providers/apache/hive/src/airflow/providers/apache/hive/get_provider_info.py @@ -54,6 +54,22 @@ def get_provider_info(): ], } ], + "asset-uris": [ + { + "schemes": ["hive"], + "handler": "airflow.providers.apache.hive.assets.hive.sanitize_uri", + "factory": "airflow.providers.apache.hive.assets.hive.create_asset", + "to_openlineage_converter": "airflow.providers.apache.hive.assets.hive.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["hive"], + "handler": "airflow.providers.apache.hive.assets.hive.sanitize_uri", + "factory": "airflow.providers.apache.hive.assets.hive.create_asset", + "to_openlineage_converter": "airflow.providers.apache.hive.assets.hive.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Apache Hive", diff --git a/providers/apache/hive/tests/unit/apache/hive/assets/__init__.py b/providers/apache/hive/tests/unit/apache/hive/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/hive/tests/unit/apache/hive/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/hive/tests/unit/apache/hive/assets/test_hive.py b/providers/apache/hive/tests/unit/apache/hive/assets/test_hive.py new file mode 100644 index 0000000000000..91fa5e54cd3a1 --- /dev/null +++ b/providers/apache/hive/tests/unit/apache/hive/assets/test_hive.py @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.apache.hive.assets.hive import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) +from airflow.providers.common.compat.assets import Asset + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("hive://host:10000/default/mytable", id="valid"), + ], +) +def test_sanitize_uri_pass(value: str) -> None: + result = sanitize_uri(urllib.parse.urlsplit(value)) + assert result.scheme == "hive" + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("hive:///db/table", id="missing-host"), + pytest.param("hive://host:10000", id="missing-path"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + with pytest.raises(ValueError, match="must contain"): + sanitize_uri(urllib.parse.urlsplit(value)) + + +@pytest.mark.parametrize( + ("host", "database", "table", "port", "expected_uri"), + [ + pytest.param( + "myhost", "default", "mytable", 10000, "hive://myhost:10000/default/mytable", id="default-port" + ), + pytest.param("myhost", "db", "t", 10001, "hive://myhost:10001/db/t", id="custom-port"), + ], +) +def test_create_asset(host: str, database: str, table: str, port: int, expected_uri: str) -> None: + result = create_asset(host=host, database=database, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "hive://myhost:10000/default/mytable", "hive://myhost:10000", "default.mytable", id="default-port" + ), + pytest.param( + "hive://otherhost:10001/mydb/users", "hive://otherhost:10001", "mydb.users", id="custom-port" + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == expected_namespace + assert result.name == expected_name diff --git a/providers/apache/kafka/provider.yaml b/providers/apache/kafka/provider.yaml index e482a487383f5..54a8ed9ebb846 100644 --- a/providers/apache/kafka/provider.yaml +++ b/providers/apache/kafka/provider.yaml @@ -77,6 +77,20 @@ operators: - airflow.providers.apache.kafka.operators.consume - airflow.providers.apache.kafka.operators.produce +asset-uris: + - schemes: [kafka] + handler: airflow.providers.apache.kafka.assets.kafka.sanitize_uri + factory: airflow.providers.apache.kafka.assets.kafka.create_asset + to_openlineage_converter: airflow.providers.apache.kafka.assets.kafka.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [kafka] + handler: airflow.providers.apache.kafka.assets.kafka.sanitize_uri + factory: airflow.providers.apache.kafka.assets.kafka.create_asset + to_openlineage_converter: airflow.providers.apache.kafka.assets.kafka.convert_asset_to_openlineage + hooks: - integration-name: Apache Kafka python-modules: diff --git a/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/__init__.py b/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/kafka.py b/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/kafka.py new file mode 100644 index 0000000000000..52db61c071d55 --- /dev/null +++ b/providers/apache/kafka/src/airflow/providers/apache/kafka/assets/kafka.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format kafka:// must contain a bootstrap server host") + if not uri.path or uri.path == "/": + raise ValueError("URI format kafka:// must contain a topic name") + return uri + + +def create_asset(*, server: str, topic: str, extra: dict | None = None) -> Asset: + return Asset(uri=f"kafka://{server}/{topic}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + return OpenLineageDataset(namespace=f"kafka://{parsed.netloc}", name=parsed.path.lstrip("/") or "/") diff --git a/providers/apache/kafka/src/airflow/providers/apache/kafka/get_provider_info.py b/providers/apache/kafka/src/airflow/providers/apache/kafka/get_provider_info.py index 11d8b678304e4..1a68a7b916744 100644 --- a/providers/apache/kafka/src/airflow/providers/apache/kafka/get_provider_info.py +++ b/providers/apache/kafka/src/airflow/providers/apache/kafka/get_provider_info.py @@ -44,6 +44,22 @@ def get_provider_info(): ], } ], + "asset-uris": [ + { + "schemes": ["kafka"], + "handler": "airflow.providers.apache.kafka.assets.kafka.sanitize_uri", + "factory": "airflow.providers.apache.kafka.assets.kafka.create_asset", + "to_openlineage_converter": "airflow.providers.apache.kafka.assets.kafka.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["kafka"], + "handler": "airflow.providers.apache.kafka.assets.kafka.sanitize_uri", + "factory": "airflow.providers.apache.kafka.assets.kafka.create_asset", + "to_openlineage_converter": "airflow.providers.apache.kafka.assets.kafka.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Apache Kafka", diff --git a/providers/apache/kafka/tests/unit/apache/kafka/assets/__init__.py b/providers/apache/kafka/tests/unit/apache/kafka/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/kafka/tests/unit/apache/kafka/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/kafka/tests/unit/apache/kafka/assets/test_kafka.py b/providers/apache/kafka/tests/unit/apache/kafka/assets/test_kafka.py new file mode 100644 index 0000000000000..5067382207518 --- /dev/null +++ b/providers/apache/kafka/tests/unit/apache/kafka/assets/test_kafka.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.apache.kafka.assets.kafka import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) +from airflow.providers.common.compat.assets import Asset + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("kafka://broker1:9092/my-topic", id="valid"), + ], +) +def test_sanitize_uri_pass(value: str) -> None: + result = sanitize_uri(urllib.parse.urlsplit(value)) + assert result.scheme == "kafka" + assert result.netloc == "broker1:9092" + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("kafka:///my-topic", id="missing-host"), + pytest.param("kafka://broker1:9092", id="missing-topic"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + with pytest.raises(ValueError, match="must contain"): + sanitize_uri(urllib.parse.urlsplit(value)) + + +def test_create_asset() -> None: + result = create_asset(server="broker1:9092", topic="my-topic") + assert result == Asset(uri="kafka://broker1:9092/my-topic") + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param("kafka://broker1:9092/my-topic", "kafka://broker1:9092", "my-topic", id="basic"), + pytest.param( + "kafka://broker1:9092,broker2:9092/events-topic", + "kafka://broker1:9092,broker2:9092", + "events-topic", + id="multiple-brokers", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == expected_namespace + assert result.name == expected_name diff --git a/providers/databricks/provider.yaml b/providers/databricks/provider.yaml index 9671f063b8ff9..2972036602efa 100644 --- a/providers/databricks/provider.yaml +++ b/providers/databricks/provider.yaml @@ -148,6 +148,20 @@ operators: python-modules: - airflow.providers.databricks.operators.databricks_workflow +asset-uris: + - schemes: [databricks] + handler: airflow.providers.databricks.assets.databricks.sanitize_uri + factory: airflow.providers.databricks.assets.databricks.create_asset + to_openlineage_converter: airflow.providers.databricks.assets.databricks.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [databricks] + handler: airflow.providers.databricks.assets.databricks.sanitize_uri + factory: airflow.providers.databricks.assets.databricks.create_asset + to_openlineage_converter: airflow.providers.databricks.assets.databricks.convert_asset_to_openlineage + hooks: - integration-name: Databricks python-modules: diff --git a/providers/databricks/src/airflow/providers/databricks/assets/__init__.py b/providers/databricks/src/airflow/providers/databricks/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/databricks/src/airflow/providers/databricks/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/databricks/src/airflow/providers/databricks/assets/databricks.py b/providers/databricks/src/airflow/providers/databricks/assets/databricks.py new file mode 100644 index 0000000000000..6424aff878c2e --- /dev/null +++ b/providers/databricks/src/airflow/providers/databricks/assets/databricks.py @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format databricks:// must contain a host") + if len(uri.path.split("/")) != 4: # Leading slash, catalog, schema, and table names. + raise ValueError("URI format databricks:// must contain catalog, schema, and table names") + return uri + + +def create_asset( + *, host: str, catalog: str, schema: str, table: str, port: str | None = None, extra: dict | None = None +) -> Asset: + port = f":{port}" if port else "" + return Asset(uri=f"databricks://{host}{port}/{catalog}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, catalog, schema, table = parsed.path.split("/") + return OpenLineageDataset(namespace=f"databricks://{parsed.netloc}", name=f"{catalog}.{schema}.{table}") diff --git a/providers/databricks/src/airflow/providers/databricks/get_provider_info.py b/providers/databricks/src/airflow/providers/databricks/get_provider_info.py index 5f12cb02ddbe8..f80d54a130d81 100644 --- a/providers/databricks/src/airflow/providers/databricks/get_provider_info.py +++ b/providers/databricks/src/airflow/providers/databricks/get_provider_info.py @@ -85,6 +85,22 @@ def get_provider_info(): "python-modules": ["airflow.providers.databricks.operators.databricks_workflow"], }, ], + "asset-uris": [ + { + "schemes": ["databricks"], + "handler": "airflow.providers.databricks.assets.databricks.sanitize_uri", + "factory": "airflow.providers.databricks.assets.databricks.create_asset", + "to_openlineage_converter": "airflow.providers.databricks.assets.databricks.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["databricks"], + "handler": "airflow.providers.databricks.assets.databricks.sanitize_uri", + "factory": "airflow.providers.databricks.assets.databricks.create_asset", + "to_openlineage_converter": "airflow.providers.databricks.assets.databricks.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Databricks", diff --git a/providers/databricks/tests/unit/databricks/assets/__init__.py b/providers/databricks/tests/unit/databricks/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/databricks/tests/unit/databricks/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/databricks/tests/unit/databricks/assets/test_databricks.py b/providers/databricks/tests/unit/databricks/assets/test_databricks.py new file mode 100644 index 0000000000000..7b072ce1ceba0 --- /dev/null +++ b/providers/databricks/tests/unit/databricks/assets/test_databricks.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.databricks.assets.databricks import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "databricks://my-workspace.cloud.databricks.com/catalog/schema/table", + "databricks://my-workspace.cloud.databricks.com/catalog/schema/table", + id="normalized", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("databricks://", id="blank"), + pytest.param("databricks:///catalog/schema/table", id="no-host"), + pytest.param("databricks://host/catalog/table", id="missing-component"), + pytest.param("databricks://host/catalog/schema/table/column", id="extra-component"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format databricks:// must contain"): + sanitize_uri(uri_i) + + +def test_create_asset() -> None: + result = create_asset( + host="my-workspace.cloud.databricks.com", + catalog="main", + schema="default", + table="users", + ) + assert result == Asset(uri="databricks://my-workspace.cloud.databricks.com/main/default/users") + + +def test_convert_asset_to_openlineage() -> None: + asset = Asset(uri="databricks://my-workspace.cloud.databricks.com/main/default/users") + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "databricks://my-workspace.cloud.databricks.com" + assert ol_dataset.name == "main.default.users" diff --git a/providers/ftp/provider.yaml b/providers/ftp/provider.yaml index 7982ad34e368e..30fc58d9344b5 100644 --- a/providers/ftp/provider.yaml +++ b/providers/ftp/provider.yaml @@ -82,6 +82,20 @@ sensors: python-modules: - airflow.providers.ftp.sensors.ftp +asset-uris: + - schemes: [ftp] + handler: airflow.providers.ftp.assets.ftp.sanitize_uri + factory: airflow.providers.ftp.assets.ftp.create_asset + to_openlineage_converter: airflow.providers.ftp.assets.ftp.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [ftp] + handler: airflow.providers.ftp.assets.ftp.sanitize_uri + factory: airflow.providers.ftp.assets.ftp.create_asset + to_openlineage_converter: airflow.providers.ftp.assets.ftp.convert_asset_to_openlineage + hooks: - integration-name: File Transfer Protocol (FTP) python-modules: diff --git a/providers/ftp/src/airflow/providers/ftp/assets/__init__.py b/providers/ftp/src/airflow/providers/ftp/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ftp/src/airflow/providers/ftp/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ftp/src/airflow/providers/ftp/assets/ftp.py b/providers/ftp/src/airflow/providers/ftp/assets/ftp.py new file mode 100644 index 0000000000000..9c6f97e512554 --- /dev/null +++ b/providers/ftp/src/airflow/providers/ftp/assets/ftp.py @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format ftp:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:21") + if not uri.path: + raise ValueError("URI format ftp:// must contain a path") + return uri + + +def create_asset(*, host: str, path: str, port: int = 21, extra: dict | None = None) -> Asset: + return Asset(uri=f"ftp://{host}:{port}/{path}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + path = parsed.path[1:] if parsed.path.startswith("/") else parsed.path + return OpenLineageDataset(namespace=f"file://{parsed.netloc}", name=path or "/") diff --git a/providers/ftp/src/airflow/providers/ftp/get_provider_info.py b/providers/ftp/src/airflow/providers/ftp/get_provider_info.py index 8658c11981f08..39297a9b86475 100644 --- a/providers/ftp/src/airflow/providers/ftp/get_provider_info.py +++ b/providers/ftp/src/airflow/providers/ftp/get_provider_info.py @@ -46,6 +46,22 @@ def get_provider_info(): "python-modules": ["airflow.providers.ftp.sensors.ftp"], } ], + "asset-uris": [ + { + "schemes": ["ftp"], + "handler": "airflow.providers.ftp.assets.ftp.sanitize_uri", + "factory": "airflow.providers.ftp.assets.ftp.create_asset", + "to_openlineage_converter": "airflow.providers.ftp.assets.ftp.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["ftp"], + "handler": "airflow.providers.ftp.assets.ftp.sanitize_uri", + "factory": "airflow.providers.ftp.assets.ftp.create_asset", + "to_openlineage_converter": "airflow.providers.ftp.assets.ftp.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "File Transfer Protocol (FTP)", diff --git a/providers/ftp/tests/unit/ftp/assets/__init__.py b/providers/ftp/tests/unit/ftp/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ftp/tests/unit/ftp/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ftp/tests/unit/ftp/assets/test_ftp.py b/providers/ftp/tests/unit/ftp/assets/test_ftp.py new file mode 100644 index 0000000000000..a85b3dc24fa17 --- /dev/null +++ b/providers/ftp/tests/unit/ftp/assets/test_ftp.py @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.ftp.assets.ftp import convert_asset_to_openlineage, create_asset, sanitize_uri + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "ftp://example.com:2121/data/file.csv", + "ftp://example.com:2121/data/file.csv", + id="normalized", + ), + pytest.param( + "ftp://example.com/data/file.csv", + "ftp://example.com:21/data/file.csv", + id="default-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("ftp://", id="blank"), + pytest.param("ftp:///path/to/file", id="no-host"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format ftp:// must contain"): + sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("path", "expected_uri"), + [ + pytest.param("/data/file.csv", "ftp://example.com:21//data/file.csv", id="root"), + pytest.param("data/file.csv", "ftp://example.com:21/data/file.csv", id="no-leading-slash"), + ], +) +def test_create_asset(path: str, expected_uri: str) -> None: + result = create_asset(host="example.com", path=path) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("expected_name", "uri"), + [ + pytest.param("/", "ftp://example.com:21", id="no-path"), + pytest.param("/", "ftp://example.com:21/", id="path-slash-only"), + pytest.param("/data/file.csv", "ftp://example.com:21//data/file.csv", id="root"), + pytest.param("data/file.csv", "ftp://example.com:21/data/file.csv", id="no-leading-slash"), + pytest.param("//data/file.csv", "ftp://example.com:21///data/file.csv", id="two-slashes"), + ], +) +def test_convert_asset_to_openlineage(expected_name, uri) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "file://example.com:21" + assert ol_dataset.name == expected_name diff --git a/providers/microsoft/mssql/docs/index.rst b/providers/microsoft/mssql/docs/index.rst index 9d3b13a269042..3d94b96505f62 100644 --- a/providers/microsoft/mssql/docs/index.rst +++ b/providers/microsoft/mssql/docs/index.rst @@ -118,15 +118,16 @@ You can install such cross-provider dependencies when installing from PyPI. For .. code-block:: bash - pip install apache-airflow-providers-microsoft-mssql[common.sql] + pip install apache-airflow-providers-microsoft-mssql[common.compat] -============================================================================================================== =============== -Dependent package Extra -============================================================================================================== =============== -`apache-airflow-providers-common-sql `_ ``common.sql`` -`apache-airflow-providers-openlineage `_ ``openlineage`` -============================================================================================================== =============== +================================================================================================================== ================= +Dependent package Extra +================================================================================================================== ================= +`apache-airflow-providers-common-compat `_ ``common.compat`` +`apache-airflow-providers-common-sql `_ ``common.sql`` +`apache-airflow-providers-openlineage `_ ``openlineage`` +================================================================================================================== ================= Downloading official packages ----------------------------- diff --git a/providers/microsoft/mssql/provider.yaml b/providers/microsoft/mssql/provider.yaml index 6c2a0cb842640..4672b4ce4a73a 100644 --- a/providers/microsoft/mssql/provider.yaml +++ b/providers/microsoft/mssql/provider.yaml @@ -84,6 +84,20 @@ dialects: - dialect-type: mssql dialect-class-name: airflow.providers.microsoft.mssql.dialects.mssql.MsSqlDialect +asset-uris: + - schemes: [mssql] + handler: airflow.providers.microsoft.mssql.assets.mssql.sanitize_uri + factory: airflow.providers.microsoft.mssql.assets.mssql.create_asset + to_openlineage_converter: airflow.providers.microsoft.mssql.assets.mssql.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [mssql] + handler: airflow.providers.microsoft.mssql.assets.mssql.sanitize_uri + factory: airflow.providers.microsoft.mssql.assets.mssql.create_asset + to_openlineage_converter: airflow.providers.microsoft.mssql.assets.mssql.convert_asset_to_openlineage + hooks: - integration-name: Microsoft SQL Server (MSSQL) python-modules: diff --git a/providers/microsoft/mssql/pyproject.toml b/providers/microsoft/mssql/pyproject.toml index adbac810c00e5..ba08bbb1186b9 100644 --- a/providers/microsoft/mssql/pyproject.toml +++ b/providers/microsoft/mssql/pyproject.toml @@ -72,12 +72,16 @@ dependencies = [ "openlineage" = [ "apache-airflow-providers-openlineage" ] +"common.compat" = [ + "apache-airflow-providers-common-compat" +] [dependency-groups] dev = [ "apache-airflow", "apache-airflow-task-sdk", "apache-airflow-devel-common", + "apache-airflow-providers-common-compat", "apache-airflow-providers-common-sql", "apache-airflow-providers-openlineage", # Additional devel dependencies (do not remove this line and add extra development dependencies) diff --git a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/__init__.py b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py new file mode 100644 index 0000000000000..b7251048a4826 --- /dev/null +++ b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/assets/mssql.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format mssql:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:1433") + if len(uri.path.split("/")) != 4: # Leading slash, database, schema, and table names. + raise ValueError("URI format mssql:// must contain database, schema, and table names") + return uri + + +def create_asset( + *, host: str, database: str, schema: str, table: str, port: int = 1433, extra: dict | None = None +) -> Asset: + return Asset(uri=f"mssql://{host}:{port}/{database}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, schema, table = parsed.path.split("/") # Leading slash, database, schema, and table names. + return OpenLineageDataset(namespace=f"mssql://{parsed.netloc}", name=f"{database}.{schema}.{table}") diff --git a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/get_provider_info.py b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/get_provider_info.py index 02c50438b3f26..bfdb0af905d47 100644 --- a/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/get_provider_info.py +++ b/providers/microsoft/mssql/src/airflow/providers/microsoft/mssql/get_provider_info.py @@ -41,6 +41,22 @@ def get_provider_info(): "dialect-class-name": "airflow.providers.microsoft.mssql.dialects.mssql.MsSqlDialect", } ], + "asset-uris": [ + { + "schemes": ["mssql"], + "handler": "airflow.providers.microsoft.mssql.assets.mssql.sanitize_uri", + "factory": "airflow.providers.microsoft.mssql.assets.mssql.create_asset", + "to_openlineage_converter": "airflow.providers.microsoft.mssql.assets.mssql.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["mssql"], + "handler": "airflow.providers.microsoft.mssql.assets.mssql.sanitize_uri", + "factory": "airflow.providers.microsoft.mssql.assets.mssql.create_asset", + "to_openlineage_converter": "airflow.providers.microsoft.mssql.assets.mssql.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Microsoft SQL Server (MSSQL)", diff --git a/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/__init__.py b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py new file mode 100644 index 0000000000000..d44b2205b43a5 --- /dev/null +++ b/providers/microsoft/mssql/tests/unit/microsoft/mssql/assets/test_mssql.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.microsoft.mssql.assets.mssql import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "mssql://example.com:1234/database/schema/table", + "mssql://example.com:1234/database/schema/table", + id="normalized", + ), + pytest.param( + "mssql://example.com/database/schema/table", + "mssql://example.com:1433/database/schema/table", + id="default-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("mssql://", id="blank"), + pytest.param("mssql:///database/schema/table", id="no-host"), + pytest.param("mssql://example.com/database/table", id="missing-component"), + pytest.param("mssql://example.com/database/schema/table/column", id="extra-component"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format mssql:// must contain"): + sanitize_uri(uri_i) + + +def test_sanitize_uri_fail_non_port() -> None: + uri_i = urllib.parse.urlsplit("mssql://example.com:abcd/database/schema/table") + with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): + sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("host", "database", "schema", "table", "port", "expected_uri"), + [ + pytest.param( + "example.com", + "mydb", + "dbo", + "users", + 1433, + "mssql://example.com:1433/mydb/dbo/users", + id="default-port", + ), + pytest.param( + "example.com", + "mydb", + "dbo", + "users", + 1434, + "mssql://example.com:1434/mydb/dbo/users", + id="custom-port", + ), + ], +) +def test_create_asset( + host: str, database: str, schema: str, table: str, port: int, expected_uri: str +) -> None: + result = create_asset(host=host, database=database, schema=schema, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "mssql://example.com:1433/mydb/dbo/users", + "mssql://example.com:1433", + "mydb.dbo.users", + id="default-port", + ), + pytest.param( + "mssql://db-host:1434/testdb/schema1/events", + "mssql://db-host:1434", + "testdb.schema1.events", + id="custom-port", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == expected_namespace + assert ol_dataset.name == expected_name diff --git a/providers/mongo/provider.yaml b/providers/mongo/provider.yaml index f184eed72baef..da26f30e08edf 100644 --- a/providers/mongo/provider.yaml +++ b/providers/mongo/provider.yaml @@ -86,6 +86,20 @@ hooks: python-modules: - airflow.providers.mongo.hooks.mongo +asset-uris: + - schemes: [mongodb] + handler: airflow.providers.mongo.assets.mongo.sanitize_uri + factory: airflow.providers.mongo.assets.mongo.create_asset + to_openlineage_converter: airflow.providers.mongo.assets.mongo.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [mongodb] + handler: airflow.providers.mongo.assets.mongo.sanitize_uri + factory: airflow.providers.mongo.assets.mongo.create_asset + to_openlineage_converter: airflow.providers.mongo.assets.mongo.convert_asset_to_openlineage + connection-types: - hook-class-name: airflow.providers.mongo.hooks.mongo.MongoHook hook-name: "MongoDB" diff --git a/providers/mongo/src/airflow/providers/mongo/assets/__init__.py b/providers/mongo/src/airflow/providers/mongo/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/mongo/src/airflow/providers/mongo/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/mongo/src/airflow/providers/mongo/assets/mongo.py b/providers/mongo/src/airflow/providers/mongo/assets/mongo.py new file mode 100644 index 0000000000000..7bf0768090bd7 --- /dev/null +++ b/providers/mongo/src/airflow/providers/mongo/assets/mongo.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format mongodb:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:27017") + if len(uri.path.split("/")) != 3: # Leading slash, database and collection. + raise ValueError("URI format mongodb:// must contain a database and collection") + return uri + + +def create_asset( + *, host: str, database: str, collection: str, port: int = 27017, extra: dict | None = None +) -> Asset: + return Asset(uri=f"mongodb://{host}:{port}/{database}/{collection}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, collection = parsed.path.split("/") # Leading slash, database and collection. + return OpenLineageDataset(namespace=f"mongodb://{parsed.netloc}", name=f"{database}.{collection}") diff --git a/providers/mongo/src/airflow/providers/mongo/get_provider_info.py b/providers/mongo/src/airflow/providers/mongo/get_provider_info.py index 7e6cf3c0f60e1..156c1bdbca450 100644 --- a/providers/mongo/src/airflow/providers/mongo/get_provider_info.py +++ b/providers/mongo/src/airflow/providers/mongo/get_provider_info.py @@ -38,6 +38,22 @@ def get_provider_info(): {"integration-name": "MongoDB", "python-modules": ["airflow.providers.mongo.sensors.mongo"]} ], "hooks": [{"integration-name": "MongoDB", "python-modules": ["airflow.providers.mongo.hooks.mongo"]}], + "asset-uris": [ + { + "schemes": ["mongodb"], + "handler": "airflow.providers.mongo.assets.mongo.sanitize_uri", + "factory": "airflow.providers.mongo.assets.mongo.create_asset", + "to_openlineage_converter": "airflow.providers.mongo.assets.mongo.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["mongodb"], + "handler": "airflow.providers.mongo.assets.mongo.sanitize_uri", + "factory": "airflow.providers.mongo.assets.mongo.create_asset", + "to_openlineage_converter": "airflow.providers.mongo.assets.mongo.convert_asset_to_openlineage", + } + ], "connection-types": [ { "hook-class-name": "airflow.providers.mongo.hooks.mongo.MongoHook", diff --git a/providers/mongo/tests/unit/mongo/assets/__init__.py b/providers/mongo/tests/unit/mongo/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/mongo/tests/unit/mongo/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/mongo/tests/unit/mongo/assets/test_mongo.py b/providers/mongo/tests/unit/mongo/assets/test_mongo.py new file mode 100644 index 0000000000000..15072fce3c5cf --- /dev/null +++ b/providers/mongo/tests/unit/mongo/assets/test_mongo.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.mongo.assets.mongo import convert_asset_to_openlineage, create_asset, sanitize_uri + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("mongodb://host:27017/mydb/mycollection", id="valid"), + ], +) +def test_sanitize_uri_pass(value: str) -> None: + result = sanitize_uri(urllib.parse.urlsplit(value)) + assert result.scheme == "mongodb" + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("mongodb:///db/collection", id="missing-host"), + pytest.param("mongodb://host:27017", id="missing-path"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + with pytest.raises(ValueError, match="must contain"): + sanitize_uri(urllib.parse.urlsplit(value)) + + +@pytest.mark.parametrize( + ("host", "database", "collection", "port", "expected_uri"), + [ + pytest.param( + "myhost", + "mydb", + "mycollection", + 27017, + "mongodb://myhost:27017/mydb/mycollection", + id="default-port", + ), + pytest.param("myhost", "db", "col", 27018, "mongodb://myhost:27018/db/col", id="custom-port"), + ], +) +def test_create_asset(host: str, database: str, collection: str, port: int, expected_uri: str) -> None: + result = create_asset(host=host, database=database, collection=collection, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "mongodb://myhost:27017/mydb/mycollection", + "mongodb://myhost:27017", + "mydb.mycollection", + id="default-port", + ), + pytest.param( + "mongodb://otherhost:27018/testdb/users", + "mongodb://otherhost:27018", + "testdb.users", + id="custom-port", + ), + pytest.param( + "mongodb://cluster:27017/admin/system.indexes", + "mongodb://cluster:27017", + "admin.system.indexes", + id="system-collection", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == expected_namespace + assert result.name == expected_name diff --git a/providers/mysql/provider.yaml b/providers/mysql/provider.yaml index ebfe42bdb7b6c..4bace9dffa864 100644 --- a/providers/mysql/provider.yaml +++ b/providers/mysql/provider.yaml @@ -126,9 +126,13 @@ connection-types: asset-uris: - schemes: [mysql, mariadb] handler: airflow.providers.mysql.assets.mysql.sanitize_uri + factory: airflow.providers.mysql.assets.mysql.create_asset + to_openlineage_converter: airflow.providers.mysql.assets.mysql.convert_asset_to_openlineage # dataset has been renamed to asset in Airflow 3.0 # This is kept for backward compatibility. dataset-uris: - schemes: [mysql, mariadb] handler: airflow.providers.mysql.assets.mysql.sanitize_uri + factory: airflow.providers.mysql.assets.mysql.create_asset + to_openlineage_converter: airflow.providers.mysql.assets.mysql.convert_asset_to_openlineage diff --git a/providers/mysql/src/airflow/providers/mysql/assets/mysql.py b/providers/mysql/src/airflow/providers/mysql/assets/mysql.py index 0ead1aff1173c..0be56c56c4ab1 100644 --- a/providers/mysql/src/airflow/providers/mysql/assets/mysql.py +++ b/providers/mysql/src/airflow/providers/mysql/assets/mysql.py @@ -19,9 +19,13 @@ from typing import TYPE_CHECKING +from airflow.providers.common.compat.assets import Asset + if TYPE_CHECKING: from urllib.parse import SplitResult + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + def sanitize_uri(uri: SplitResult) -> SplitResult: if not uri.netloc: @@ -32,3 +36,20 @@ def sanitize_uri(uri: SplitResult) -> SplitResult: if len(uri.path.split("/")) != 3: # Leading slash, database name, and table name. raise ValueError("URI format mysql:// must contain database and table names") return uri._replace(scheme="mysql") + + +def create_asset( + *, host: str, database: str, table: str, port: int = 3306, extra: dict | None = None +) -> Asset: + return Asset(uri=f"mysql://{host}:{port}/{database}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, table = parsed.path.split("/") # Leading slash, database name, and table name. + return OpenLineageDataset(namespace=f"mysql://{parsed.netloc}", name=f"{database}.{table}") diff --git a/providers/mysql/src/airflow/providers/mysql/get_provider_info.py b/providers/mysql/src/airflow/providers/mysql/get_provider_info.py index f0503d851df1f..22f172b451599 100644 --- a/providers/mysql/src/airflow/providers/mysql/get_provider_info.py +++ b/providers/mysql/src/airflow/providers/mysql/get_provider_info.py @@ -66,9 +66,19 @@ def get_provider_info(): } ], "asset-uris": [ - {"schemes": ["mysql", "mariadb"], "handler": "airflow.providers.mysql.assets.mysql.sanitize_uri"} + { + "schemes": ["mysql"], + "handler": "airflow.providers.mysql.assets.mysql.sanitize_uri", + "factory": "airflow.providers.mysql.assets.mysql.create_asset", + "to_openlineage_converter": "airflow.providers.mysql.assets.mysql.convert_asset_to_openlineage", + } ], "dataset-uris": [ - {"schemes": ["mysql", "mariadb"], "handler": "airflow.providers.mysql.assets.mysql.sanitize_uri"} + { + "schemes": ["mysql"], + "handler": "airflow.providers.mysql.assets.mysql.sanitize_uri", + "factory": "airflow.providers.mysql.assets.mysql.create_asset", + "to_openlineage_converter": "airflow.providers.mysql.assets.mysql.convert_asset_to_openlineage", + } ], } diff --git a/providers/mysql/tests/unit/mysql/assets/test_mysql.py b/providers/mysql/tests/unit/mysql/assets/test_mysql.py index 2381f6aabc251..74092384bea35 100644 --- a/providers/mysql/tests/unit/mysql/assets/test_mysql.py +++ b/providers/mysql/tests/unit/mysql/assets/test_mysql.py @@ -21,7 +21,8 @@ import pytest -from airflow.providers.mysql.assets.mysql import sanitize_uri +from airflow.providers.common.compat.assets import Asset +from airflow.providers.mysql.assets.mysql import convert_asset_to_openlineage, create_asset, sanitize_uri @pytest.mark.parametrize( @@ -69,3 +70,37 @@ def test_sanitize_uri_fail_non_port() -> None: uri_i = urllib.parse.urlsplit("mysql://example.com:abcd/database/table") with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("host", "database", "table", "port", "expected_uri"), + [ + pytest.param( + "example.com", "mydb", "users", 3306, "mysql://example.com:3306/mydb/users", id="default-port" + ), + pytest.param( + "example.com", "mydb", "users", 3307, "mysql://example.com:3307/mydb/users", id="custom-port" + ), + ], +) +def test_create_asset(host: str, database: str, table: str, port: int, expected_uri: str) -> None: + result = create_asset(host=host, database=database, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "mysql://example.com:3306/mydb/users", "mysql://example.com:3306", "mydb.users", id="default-port" + ), + pytest.param( + "mysql://db-host:3307/testdb/events", "mysql://db-host:3307", "testdb.events", id="custom-port" + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == expected_namespace + assert ol_dataset.name == expected_name diff --git a/providers/oracle/provider.yaml b/providers/oracle/provider.yaml index fc0226a35f4ae..7aa98e7ee8c3e 100644 --- a/providers/oracle/provider.yaml +++ b/providers/oracle/provider.yaml @@ -94,6 +94,20 @@ operators: python-modules: - airflow.providers.oracle.operators.oracle +asset-uris: + - schemes: [oracle] + handler: airflow.providers.oracle.assets.oracle.sanitize_uri + factory: airflow.providers.oracle.assets.oracle.create_asset + to_openlineage_converter: airflow.providers.oracle.assets.oracle.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [oracle] + handler: airflow.providers.oracle.assets.oracle.sanitize_uri + factory: airflow.providers.oracle.assets.oracle.create_asset + to_openlineage_converter: airflow.providers.oracle.assets.oracle.convert_asset_to_openlineage + hooks: - integration-name: Oracle python-modules: diff --git a/providers/oracle/src/airflow/providers/oracle/assets/__init__.py b/providers/oracle/src/airflow/providers/oracle/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/oracle/src/airflow/providers/oracle/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/oracle/src/airflow/providers/oracle/assets/oracle.py b/providers/oracle/src/airflow/providers/oracle/assets/oracle.py new file mode 100644 index 0000000000000..19942df7ab10b --- /dev/null +++ b/providers/oracle/src/airflow/providers/oracle/assets/oracle.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format oracle:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:1521") + if len(uri.path.split("/")) != 4: # Leading slash, service name, schema, and table names. + raise ValueError("URI format oracle:// must contain service name, schema, and table names") + return uri + + +def create_asset( + *, + host: str, + port: int = 1521, + service_name: str, + schema: str, + table: str, + extra: dict | None = None, +) -> Asset: + return Asset(uri=f"oracle://{host}:{port}/{service_name}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, service_name, schema, table = parsed.path.split( + "/" + ) # Leading slash, service_name, schema, and table names. + return OpenLineageDataset(namespace=f"oracle://{parsed.netloc}", name=f"{service_name}.{schema}.{table}") diff --git a/providers/oracle/src/airflow/providers/oracle/get_provider_info.py b/providers/oracle/src/airflow/providers/oracle/get_provider_info.py index 91b3f870147c6..d9cf0004700ac 100644 --- a/providers/oracle/src/airflow/providers/oracle/get_provider_info.py +++ b/providers/oracle/src/airflow/providers/oracle/get_provider_info.py @@ -38,6 +38,22 @@ def get_provider_info(): "operators": [ {"integration-name": "Oracle", "python-modules": ["airflow.providers.oracle.operators.oracle"]} ], + "asset-uris": [ + { + "schemes": ["oracle"], + "handler": "airflow.providers.oracle.assets.oracle.sanitize_uri", + "factory": "airflow.providers.oracle.assets.oracle.create_asset", + "to_openlineage_converter": "airflow.providers.oracle.assets.oracle.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["oracle"], + "handler": "airflow.providers.oracle.assets.oracle.sanitize_uri", + "factory": "airflow.providers.oracle.assets.oracle.create_asset", + "to_openlineage_converter": "airflow.providers.oracle.assets.oracle.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Oracle", diff --git a/providers/oracle/tests/unit/oracle/assets/__init__.py b/providers/oracle/tests/unit/oracle/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/oracle/tests/unit/oracle/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/oracle/tests/unit/oracle/assets/test_oracle.py b/providers/oracle/tests/unit/oracle/assets/test_oracle.py new file mode 100644 index 0000000000000..82d44fdaf1b34 --- /dev/null +++ b/providers/oracle/tests/unit/oracle/assets/test_oracle.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.oracle.assets.oracle import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "oracle://example.com:1234/orcl/HR/employees", + "oracle://example.com:1234/orcl/HR/employees", + id="normalized", + ), + pytest.param( + "oracle://example.com/orcl/HR/employees", + "oracle://example.com:1521/orcl/HR/employees", + id="default-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("oracle://", id="blank"), + pytest.param("oracle:///orcl/HR/employees", id="no-host"), + pytest.param("oracle://example.com/orcl/employees", id="missing-component"), + pytest.param("oracle://example.com/orcl/HR/employees/column", id="extra-component"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format oracle:// must contain"): + sanitize_uri(uri_i) + + +def test_sanitize_uri_fail_non_port() -> None: + uri_i = urllib.parse.urlsplit("oracle://example.com:abcd/orcl/HR/employees") + with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): + sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("host", "service_name", "schema", "table", "port", "expected_uri"), + [ + pytest.param( + "example.com", + "orcl", + "HR", + "employees", + 1521, + "oracle://example.com:1521/orcl/HR/employees", + id="default-port", + ), + pytest.param( + "example.com", + "orcl", + "HR", + "employees", + 1522, + "oracle://example.com:1522/orcl/HR/employees", + id="custom-port", + ), + ], +) +def test_create_asset( + host: str, service_name: str, schema: str, table: str, port: int, expected_uri: str +) -> None: + result = create_asset(host=host, service_name=service_name, schema=schema, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "oracle://example.com:1521/orcl/HR/employees", + "oracle://example.com:1521", + "orcl.HR.employees", + id="default-port", + ), + pytest.param( + "oracle://db-host:1522/prod/SCHEMA/users", + "oracle://db-host:1522", + "prod.SCHEMA.users", + id="custom-port", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == expected_namespace + assert ol_dataset.name == expected_name diff --git a/providers/postgres/provider.yaml b/providers/postgres/provider.yaml index 030a02069d3b8..f4611d764e991 100644 --- a/providers/postgres/provider.yaml +++ b/providers/postgres/provider.yaml @@ -119,12 +119,16 @@ connection-types: asset-uris: - schemes: [postgres, postgresql] handler: airflow.providers.postgres.assets.postgres.sanitize_uri + factory: airflow.providers.postgres.assets.postgres.create_asset + to_openlineage_converter: airflow.providers.postgres.assets.postgres.convert_asset_to_openlineage # dataset has been renamed to asset in Airflow 3.0 # This is kept for backward compatibility. dataset-uris: - schemes: [postgres, postgresql] handler: airflow.providers.postgres.assets.postgres.sanitize_uri + factory: airflow.providers.postgres.assets.postgres.create_asset + to_openlineage_converter: airflow.providers.postgres.assets.postgres.convert_asset_to_openlineage config: postgres: diff --git a/providers/postgres/src/airflow/providers/postgres/assets/postgres.py b/providers/postgres/src/airflow/providers/postgres/assets/postgres.py index b3cee7234cd4a..311532a0b3a65 100644 --- a/providers/postgres/src/airflow/providers/postgres/assets/postgres.py +++ b/providers/postgres/src/airflow/providers/postgres/assets/postgres.py @@ -19,9 +19,13 @@ from typing import TYPE_CHECKING +from airflow.providers.common.compat.assets import Asset + if TYPE_CHECKING: from urllib.parse import SplitResult + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + def sanitize_uri(uri: SplitResult) -> SplitResult: if not uri.netloc: @@ -35,3 +39,20 @@ def sanitize_uri(uri: SplitResult) -> SplitResult: if not path_parts[2]: path_parts[2] = "default" return uri._replace(scheme="postgres", path="/".join(path_parts)) + + +def create_asset( + *, host: str, database: str, schema: str, table: str, port: int = 5432, extra: dict | None = None +) -> Asset: + return Asset(uri=f"postgres://{host}:{port}/{database}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, schema, table = parsed.path.split("/") # Leading slash, database, schema, and table names. + return OpenLineageDataset(namespace=f"postgres://{parsed.netloc}", name=f"{database}.{schema}.{table}") diff --git a/providers/postgres/src/airflow/providers/postgres/get_provider_info.py b/providers/postgres/src/airflow/providers/postgres/get_provider_info.py index 7919f57d4b977..45fe17c1a495a 100644 --- a/providers/postgres/src/airflow/providers/postgres/get_provider_info.py +++ b/providers/postgres/src/airflow/providers/postgres/get_provider_info.py @@ -59,12 +59,16 @@ def get_provider_info(): { "schemes": ["postgres", "postgresql"], "handler": "airflow.providers.postgres.assets.postgres.sanitize_uri", + "factory": "airflow.providers.postgres.assets.postgres.create_asset", + "to_openlineage_converter": "airflow.providers.postgres.assets.postgres.convert_asset_to_openlineage", } ], "dataset-uris": [ { "schemes": ["postgres", "postgresql"], "handler": "airflow.providers.postgres.assets.postgres.sanitize_uri", + "factory": "airflow.providers.postgres.assets.postgres.create_asset", + "to_openlineage_converter": "airflow.providers.postgres.assets.postgres.convert_asset_to_openlineage", } ], "config": { diff --git a/providers/postgres/tests/unit/postgres/assets/test_postgres.py b/providers/postgres/tests/unit/postgres/assets/test_postgres.py index a06beea747c07..b213d48fef465 100644 --- a/providers/postgres/tests/unit/postgres/assets/test_postgres.py +++ b/providers/postgres/tests/unit/postgres/assets/test_postgres.py @@ -21,7 +21,12 @@ import pytest -from airflow.providers.postgres.assets.postgres import sanitize_uri +from airflow.providers.common.compat.assets import Asset +from airflow.providers.postgres.assets.postgres import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) @pytest.mark.parametrize( @@ -69,3 +74,57 @@ def test_sanitize_uri_fail_non_port() -> None: uri_i = urllib.parse.urlsplit("postgres://example.com:abcd/database/schema/table") with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("host", "database", "schema", "table", "port", "expected_uri"), + [ + pytest.param( + "example.com", + "mydb", + "public", + "users", + 5432, + "postgres://example.com:5432/mydb/public/users", + id="default-port", + ), + pytest.param( + "example.com", + "mydb", + "public", + "users", + 5433, + "postgres://example.com:5433/mydb/public/users", + id="custom-port", + ), + ], +) +def test_create_asset( + host: str, database: str, schema: str, table: str, port: int, expected_uri: str +) -> None: + result = create_asset(host=host, database=database, schema=schema, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "postgres://example.com:5432/mydb/public/users", + "postgres://example.com:5432", + "mydb.public.users", + id="default-port", + ), + pytest.param( + "postgres://db-host:5433/testdb/schema1/events", + "postgres://db-host:5433", + "testdb.schema1.events", + id="custom-port", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == expected_namespace + assert ol_dataset.name == expected_name diff --git a/providers/presto/provider.yaml b/providers/presto/provider.yaml index e695cd532549e..59241909b29d8 100644 --- a/providers/presto/provider.yaml +++ b/providers/presto/provider.yaml @@ -102,6 +102,20 @@ transfers: python-module: airflow.providers.presto.transfers.gcs_to_presto +asset-uris: + - schemes: [presto] + handler: airflow.providers.presto.assets.presto.sanitize_uri + factory: airflow.providers.presto.assets.presto.create_asset + to_openlineage_converter: airflow.providers.presto.assets.presto.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [presto] + handler: airflow.providers.presto.assets.presto.sanitize_uri + factory: airflow.providers.presto.assets.presto.create_asset + to_openlineage_converter: airflow.providers.presto.assets.presto.convert_asset_to_openlineage + connection-types: - hook-class-name: airflow.providers.presto.hooks.presto.PrestoHook hook-name: "Presto" diff --git a/providers/presto/src/airflow/providers/presto/assets/__init__.py b/providers/presto/src/airflow/providers/presto/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/presto/src/airflow/providers/presto/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/presto/src/airflow/providers/presto/assets/presto.py b/providers/presto/src/airflow/providers/presto/assets/presto.py new file mode 100644 index 0000000000000..bd24fc6cc3bf4 --- /dev/null +++ b/providers/presto/src/airflow/providers/presto/assets/presto.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format presto:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:8080") + if len(uri.path.split("/")) != 4: # Leading slash, catalog, schema, and table names. + raise ValueError("URI format presto:// must contain a catalog, schema, and table") + return uri + + +def create_asset( + *, host: str, catalog: str, schema: str, table: str, port: int = 8080, extra: dict | None = None +) -> Asset: + return Asset(uri=f"presto://{host}:{port}/{catalog}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, catalog, schema, table = parsed.path.split("/") # Leading slash, catalog, schema, and table names. + return OpenLineageDataset(namespace=f"presto://{parsed.netloc}", name=f"{catalog}.{schema}.{table}") diff --git a/providers/presto/src/airflow/providers/presto/get_provider_info.py b/providers/presto/src/airflow/providers/presto/get_provider_info.py index bc9975b7cba71..eff265fb10bc9 100644 --- a/providers/presto/src/airflow/providers/presto/get_provider_info.py +++ b/providers/presto/src/airflow/providers/presto/get_provider_info.py @@ -46,6 +46,22 @@ def get_provider_info(): "python-module": "airflow.providers.presto.transfers.gcs_to_presto", } ], + "asset-uris": [ + { + "schemes": ["presto"], + "handler": "airflow.providers.presto.assets.presto.sanitize_uri", + "factory": "airflow.providers.presto.assets.presto.create_asset", + "to_openlineage_converter": "airflow.providers.presto.assets.presto.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["presto"], + "handler": "airflow.providers.presto.assets.presto.sanitize_uri", + "factory": "airflow.providers.presto.assets.presto.create_asset", + "to_openlineage_converter": "airflow.providers.presto.assets.presto.convert_asset_to_openlineage", + } + ], "connection-types": [ { "hook-class-name": "airflow.providers.presto.hooks.presto.PrestoHook", diff --git a/providers/presto/tests/unit/presto/assets/__init__.py b/providers/presto/tests/unit/presto/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/presto/tests/unit/presto/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/presto/tests/unit/presto/assets/test_presto.py b/providers/presto/tests/unit/presto/assets/test_presto.py new file mode 100644 index 0000000000000..078cd5511a478 --- /dev/null +++ b/providers/presto/tests/unit/presto/assets/test_presto.py @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.presto.assets.presto import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("presto://host:8080/hive/default/mytable", id="valid"), + ], +) +def test_sanitize_uri_pass(value: str) -> None: + result = sanitize_uri(urllib.parse.urlsplit(value)) + assert result.scheme == "presto" + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("presto:///catalog/schema/table", id="missing-host"), + pytest.param("presto://host:8080", id="missing-path"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + with pytest.raises(ValueError, match="must contain"): + sanitize_uri(urllib.parse.urlsplit(value)) + + +@pytest.mark.parametrize( + ("host", "catalog", "schema", "table", "port", "expected_uri"), + [ + pytest.param( + "myhost", + "hive", + "default", + "mytable", + 8080, + "presto://myhost:8080/hive/default/mytable", + id="default-port", + ), + pytest.param("myhost", "c", "s", "t", 9090, "presto://myhost:9090/c/s/t", id="custom-port"), + ], +) +def test_create_asset(host: str, catalog: str, schema: str, table: str, port: int, expected_uri: str) -> None: + result = create_asset(host=host, catalog=catalog, schema=schema, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "presto://myhost:8080/hive/default/mytable", + "presto://myhost:8080", + "hive.default.mytable", + id="default-port", + ), + pytest.param( + "presto://otherhost:9090/postgres/public/users", + "presto://otherhost:9090", + "postgres.public.users", + id="custom-port", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == expected_namespace + assert result.name == expected_name diff --git a/providers/sftp/provider.yaml b/providers/sftp/provider.yaml index 0c4ed51ee2ef6..e9fc6712a8bf4 100644 --- a/providers/sftp/provider.yaml +++ b/providers/sftp/provider.yaml @@ -111,6 +111,20 @@ sensors: - airflow.providers.sftp.sensors.sftp - airflow.providers.sftp.decorators.sensors.sftp +asset-uris: + - schemes: [sftp] + handler: airflow.providers.sftp.assets.sftp.sanitize_uri + factory: airflow.providers.sftp.assets.sftp.create_asset + to_openlineage_converter: airflow.providers.sftp.assets.sftp.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [sftp] + handler: airflow.providers.sftp.assets.sftp.sanitize_uri + factory: airflow.providers.sftp.assets.sftp.create_asset + to_openlineage_converter: airflow.providers.sftp.assets.sftp.convert_asset_to_openlineage + hooks: - integration-name: SSH File Transfer Protocol (SFTP) python-modules: diff --git a/providers/sftp/src/airflow/providers/sftp/assets/__init__.py b/providers/sftp/src/airflow/providers/sftp/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/sftp/src/airflow/providers/sftp/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/sftp/src/airflow/providers/sftp/assets/sftp.py b/providers/sftp/src/airflow/providers/sftp/assets/sftp.py new file mode 100644 index 0000000000000..3eba167ebea31 --- /dev/null +++ b/providers/sftp/src/airflow/providers/sftp/assets/sftp.py @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format sftp:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:22") + if not uri.path: + raise ValueError("URI format sftp:// must contain a path") + return uri + + +def create_asset(*, host: str, path: str, port: int = 22, extra: dict | None = None) -> Asset: + return Asset(uri=f"sftp://{host}:{port}/{path}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + path = parsed.path[1:] if parsed.path.startswith("/") else parsed.path + return OpenLineageDataset(namespace=f"file://{parsed.netloc}", name=path or "/") diff --git a/providers/sftp/src/airflow/providers/sftp/get_provider_info.py b/providers/sftp/src/airflow/providers/sftp/get_provider_info.py index f55230a5a499c..456a74c3db7d6 100644 --- a/providers/sftp/src/airflow/providers/sftp/get_provider_info.py +++ b/providers/sftp/src/airflow/providers/sftp/get_provider_info.py @@ -50,6 +50,22 @@ def get_provider_info(): ], } ], + "asset-uris": [ + { + "schemes": ["sftp"], + "handler": "airflow.providers.sftp.assets.sftp.sanitize_uri", + "factory": "airflow.providers.sftp.assets.sftp.create_asset", + "to_openlineage_converter": "airflow.providers.sftp.assets.sftp.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["sftp"], + "handler": "airflow.providers.sftp.assets.sftp.sanitize_uri", + "factory": "airflow.providers.sftp.assets.sftp.create_asset", + "to_openlineage_converter": "airflow.providers.sftp.assets.sftp.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "SSH File Transfer Protocol (SFTP)", diff --git a/providers/sftp/tests/unit/sftp/assets/__init__.py b/providers/sftp/tests/unit/sftp/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/sftp/tests/unit/sftp/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/sftp/tests/unit/sftp/assets/test_sftp.py b/providers/sftp/tests/unit/sftp/assets/test_sftp.py new file mode 100644 index 0000000000000..6dd34fde568b4 --- /dev/null +++ b/providers/sftp/tests/unit/sftp/assets/test_sftp.py @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.sftp.assets.sftp import convert_asset_to_openlineage, create_asset, sanitize_uri + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "sftp://example.com:2222/data/file.csv", + "sftp://example.com:2222/data/file.csv", + id="normalized", + ), + pytest.param( + "sftp://example.com/data/file.csv", + "sftp://example.com:22/data/file.csv", + id="default-port", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("sftp://", id="blank"), + pytest.param("sftp:///path/to/file", id="no-host"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format sftp:// must contain"): + sanitize_uri(uri_i) + + +@pytest.mark.parametrize( + ("path", "expected_uri"), + [ + pytest.param("/data/file.csv", "sftp://example.com:22//data/file.csv", id="root"), + pytest.param("data/file.csv", "sftp://example.com:22/data/file.csv", id="no-leading-slash"), + ], +) +def test_create_asset(path: str, expected_uri: str) -> None: + result = create_asset(host="example.com", path=path) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("expected_name", "uri"), + [ + pytest.param("/", "sftp://example.com:22", id="no-path"), + pytest.param("/", "sftp://example.com:22/", id="path-slash-only"), + pytest.param("/data/file.csv", "sftp://example.com:22//data/file.csv", id="root"), + pytest.param("data/file.csv", "sftp://example.com:22/data/file.csv", id="no-leading-slash"), + pytest.param("//data/file.csv", "sftp://example.com:22///data/file.csv", id="two-slashes"), + ], +) +def test_convert_asset_to_openlineage(expected_name, uri) -> None: + asset = Asset(uri=uri) + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "file://example.com:22" + assert ol_dataset.name == expected_name diff --git a/providers/snowflake/provider.yaml b/providers/snowflake/provider.yaml index c064339348786..0b174b418f069 100644 --- a/providers/snowflake/provider.yaml +++ b/providers/snowflake/provider.yaml @@ -129,6 +129,20 @@ task-decorators: - class-name: airflow.providers.snowflake.decorators.snowpark.snowpark_task name: snowpark +asset-uris: + - schemes: [snowflake] + handler: airflow.providers.snowflake.assets.snowflake.sanitize_uri + factory: airflow.providers.snowflake.assets.snowflake.create_asset + to_openlineage_converter: airflow.providers.snowflake.assets.snowflake.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [snowflake] + handler: airflow.providers.snowflake.assets.snowflake.sanitize_uri + factory: airflow.providers.snowflake.assets.snowflake.create_asset + to_openlineage_converter: airflow.providers.snowflake.assets.snowflake.convert_asset_to_openlineage + hooks: - integration-name: Snowflake python-modules: diff --git a/providers/snowflake/src/airflow/providers/snowflake/assets/__init__.py b/providers/snowflake/src/airflow/providers/snowflake/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/snowflake/src/airflow/providers/snowflake/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/snowflake/src/airflow/providers/snowflake/assets/snowflake.py b/providers/snowflake/src/airflow/providers/snowflake/assets/snowflake.py new file mode 100644 index 0000000000000..ffeac3cbf0266 --- /dev/null +++ b/providers/snowflake/src/airflow/providers/snowflake/assets/snowflake.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format snowflake:// must contain an account identifier") + if len(uri.path.split("/")) != 4: # Leading slash, database, schema, and table names. + raise ValueError("URI format snowflake:// must contain database, schema, and table names") + return uri + + +def create_asset(*, account: str, database: str, schema: str, table: str, extra: dict | None = None) -> Asset: + return Asset(uri=f"snowflake://{account}/{database}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, schema, table = parsed.path.split("/") # Leading slash, database, schema, and table names. + return OpenLineageDataset(namespace=f"snowflake://{parsed.netloc}", name=f"{database}.{schema}.{table}") diff --git a/providers/snowflake/src/airflow/providers/snowflake/get_provider_info.py b/providers/snowflake/src/airflow/providers/snowflake/get_provider_info.py index 44778761d8f53..c6c4ffd84d42f 100644 --- a/providers/snowflake/src/airflow/providers/snowflake/get_provider_info.py +++ b/providers/snowflake/src/airflow/providers/snowflake/get_provider_info.py @@ -53,6 +53,22 @@ def get_provider_info(): "name": "snowpark", } ], + "asset-uris": [ + { + "schemes": ["snowflake"], + "handler": "airflow.providers.snowflake.assets.snowflake.sanitize_uri", + "factory": "airflow.providers.snowflake.assets.snowflake.create_asset", + "to_openlineage_converter": "airflow.providers.snowflake.assets.snowflake.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["snowflake"], + "handler": "airflow.providers.snowflake.assets.snowflake.sanitize_uri", + "factory": "airflow.providers.snowflake.assets.snowflake.create_asset", + "to_openlineage_converter": "airflow.providers.snowflake.assets.snowflake.convert_asset_to_openlineage", + } + ], "hooks": [ { "integration-name": "Snowflake", diff --git a/providers/snowflake/tests/unit/snowflake/assets/__init__.py b/providers/snowflake/tests/unit/snowflake/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/snowflake/tests/unit/snowflake/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/snowflake/tests/unit/snowflake/assets/test_snowflake.py b/providers/snowflake/tests/unit/snowflake/assets/test_snowflake.py new file mode 100644 index 0000000000000..92c8e6117acda --- /dev/null +++ b/providers/snowflake/tests/unit/snowflake/assets/test_snowflake.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.snowflake.assets.snowflake import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + ("original", "normalized"), + [ + pytest.param( + "snowflake://xy12345.us-east-1/mydb/public/table", + "snowflake://xy12345.us-east-1/mydb/public/table", + id="normalized", + ), + ], +) +def test_sanitize_uri_pass(original: str, normalized: str) -> None: + uri_i = urllib.parse.urlsplit(original) + uri_o = sanitize_uri(uri_i) + assert urllib.parse.urlunsplit(uri_o) == normalized + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("snowflake://", id="blank"), + pytest.param("snowflake:///mydb/public/table", id="no-account"), + pytest.param("snowflake://account/mydb/table", id="missing-component"), + pytest.param("snowflake://account/mydb/public/table/column", id="extra-component"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + uri_i = urllib.parse.urlsplit(value) + with pytest.raises(ValueError, match="URI format snowflake:// must contain"): + sanitize_uri(uri_i) + + +def test_create_asset() -> None: + result = create_asset(account="xy12345.us-east-1", database="mydb", schema="public", table="users") + assert result == Asset(uri="snowflake://xy12345.us-east-1/mydb/public/users") + + +def test_convert_asset_to_openlineage() -> None: + asset = Asset(uri="snowflake://xy12345.us-east-1/mydb/public/users") + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "snowflake://xy12345.us-east-1" + assert ol_dataset.name == "mydb.public.users" diff --git a/providers/teradata/provider.yaml b/providers/teradata/provider.yaml index fb8d670648d47..1ae79dfd125c1 100644 --- a/providers/teradata/provider.yaml +++ b/providers/teradata/provider.yaml @@ -125,6 +125,20 @@ transfers: python-module: airflow.providers.teradata.transfers.s3_to_teradata how-to-guide: /docs/apache-airflow-providers-teradata/operators/s3_to_teradata.rst +asset-uris: + - schemes: [teradata] + handler: airflow.providers.teradata.assets.teradata.sanitize_uri + factory: airflow.providers.teradata.assets.teradata.create_asset + to_openlineage_converter: airflow.providers.teradata.assets.teradata.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [teradata] + handler: airflow.providers.teradata.assets.teradata.sanitize_uri + factory: airflow.providers.teradata.assets.teradata.create_asset + to_openlineage_converter: airflow.providers.teradata.assets.teradata.convert_asset_to_openlineage + connection-types: - hook-class-name: airflow.providers.teradata.hooks.teradata.TeradataHook hook-name: "Teradata" diff --git a/providers/teradata/src/airflow/providers/teradata/assets/__init__.py b/providers/teradata/src/airflow/providers/teradata/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/teradata/src/airflow/providers/teradata/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/teradata/src/airflow/providers/teradata/assets/teradata.py b/providers/teradata/src/airflow/providers/teradata/assets/teradata.py new file mode 100644 index 0000000000000..19e4d984dba6f --- /dev/null +++ b/providers/teradata/src/airflow/providers/teradata/assets/teradata.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format teradata:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:1025") + if len(uri.path.split("/")) != 3: # Leading slash, database and table names. + raise ValueError("URI format teradata:// must contain a database and table") + return uri + + +def create_asset( + *, host: str, database: str, table: str, port: int = 1025, extra: dict | None = None +) -> Asset: + return Asset(uri=f"teradata://{host}:{port}/{database}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, table = parsed.path.split("/") # Leading slash, database and table names. + return OpenLineageDataset(namespace=f"teradata://{parsed.netloc}", name=f"{database}.{table}") diff --git a/providers/teradata/src/airflow/providers/teradata/get_provider_info.py b/providers/teradata/src/airflow/providers/teradata/get_provider_info.py index 5d90b0546f1ba..dd790b72c1039 100644 --- a/providers/teradata/src/airflow/providers/teradata/get_provider_info.py +++ b/providers/teradata/src/airflow/providers/teradata/get_provider_info.py @@ -95,6 +95,22 @@ def get_provider_info(): "how-to-guide": "/docs/apache-airflow-providers-teradata/operators/s3_to_teradata.rst", }, ], + "asset-uris": [ + { + "schemes": ["teradata"], + "handler": "airflow.providers.teradata.assets.teradata.sanitize_uri", + "factory": "airflow.providers.teradata.assets.teradata.create_asset", + "to_openlineage_converter": "airflow.providers.teradata.assets.teradata.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["teradata"], + "handler": "airflow.providers.teradata.assets.teradata.sanitize_uri", + "factory": "airflow.providers.teradata.assets.teradata.create_asset", + "to_openlineage_converter": "airflow.providers.teradata.assets.teradata.convert_asset_to_openlineage", + } + ], "connection-types": [ { "hook-class-name": "airflow.providers.teradata.hooks.teradata.TeradataHook", diff --git a/providers/teradata/tests/unit/teradata/assets/__init__.py b/providers/teradata/tests/unit/teradata/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/teradata/tests/unit/teradata/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/teradata/tests/unit/teradata/assets/test_teradata.py b/providers/teradata/tests/unit/teradata/assets/test_teradata.py new file mode 100644 index 0000000000000..cfe55055f27a0 --- /dev/null +++ b/providers/teradata/tests/unit/teradata/assets/test_teradata.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from urllib.parse import urlsplit + +import pytest + +from airflow.providers.teradata.assets.teradata import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +class TestSanitizeUri: + def test_valid_uri(self): + result = sanitize_uri(urlsplit("teradata://host:1025/mydb/mytable")) + assert result.scheme == "teradata" + + def test_missing_host(self): + with pytest.raises(ValueError, match="must contain a host"): + sanitize_uri(urlsplit("teradata:///db/table")) + + def test_missing_path(self): + with pytest.raises(ValueError, match="must contain a database and table"): + sanitize_uri(urlsplit("teradata://host:1025")) + + +class TestCreateAsset: + def test_basic(self): + asset = create_asset(host="myhost", database="mydb", table="mytable") + assert asset.uri == "teradata://myhost:1025/mydb/mytable" + + def test_custom_port(self): + asset = create_asset(host="myhost", port=2025, database="db", table="t") + assert asset.uri == "teradata://myhost:2025/db/t" + + +class TestConvertAssetToOpenlineage: + def test_basic(self): + asset = create_asset(host="myhost", database="mydb", table="mytable") + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == "teradata://myhost:1025" + assert result.name == "mydb.mytable" diff --git a/providers/trino/provider.yaml b/providers/trino/provider.yaml index b61c072e24b45..e3b218a6f7bf4 100644 --- a/providers/trino/provider.yaml +++ b/providers/trino/provider.yaml @@ -97,12 +97,16 @@ integrations: asset-uris: - schemes: [trino] handler: airflow.providers.trino.assets.trino.sanitize_uri + factory: airflow.providers.trino.assets.trino.create_asset + to_openlineage_converter: airflow.providers.trino.assets.trino.convert_asset_to_openlineage # dataset has been renamed to asset in Airflow 3.0 # This is kept for backward compatibility. dataset-uris: - schemes: [trino] handler: airflow.providers.trino.assets.trino.sanitize_uri + factory: airflow.providers.trino.assets.trino.create_asset + to_openlineage_converter: airflow.providers.trino.assets.trino.convert_asset_to_openlineage hooks: - integration-name: Trino diff --git a/providers/trino/src/airflow/providers/trino/assets/trino.py b/providers/trino/src/airflow/providers/trino/assets/trino.py index d5f3f669fe396..74d7b3ba3289a 100644 --- a/providers/trino/src/airflow/providers/trino/assets/trino.py +++ b/providers/trino/src/airflow/providers/trino/assets/trino.py @@ -19,9 +19,13 @@ from typing import TYPE_CHECKING +from airflow.providers.common.compat.assets import Asset + if TYPE_CHECKING: from urllib.parse import SplitResult + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + def sanitize_uri(uri: SplitResult) -> SplitResult: if not uri.netloc: @@ -32,3 +36,20 @@ def sanitize_uri(uri: SplitResult) -> SplitResult: if len(uri.path.split("/")) != 4: # Leading slash, catalog, schema, and table names. raise ValueError("URI format trino:// must contain catalog, schema, and table names") return uri + + +def create_asset( + *, host: str, catalog: str, schema: str, table: str, port: int = 8080, extra: dict | None = None +) -> Asset: + return Asset(uri=f"trino://{host}:{port}/{catalog}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, catalog, schema, table = parsed.path.split("/") # Leading slash, catalog, schema, and table names. + return OpenLineageDataset(namespace=f"trino://{parsed.netloc}", name=f"{catalog}.{schema}.{table}") diff --git a/providers/trino/src/airflow/providers/trino/get_provider_info.py b/providers/trino/src/airflow/providers/trino/get_provider_info.py index 64e4d11a231c2..73d663d2f35fb 100644 --- a/providers/trino/src/airflow/providers/trino/get_provider_info.py +++ b/providers/trino/src/airflow/providers/trino/get_provider_info.py @@ -36,10 +36,20 @@ def get_provider_info(): } ], "asset-uris": [ - {"schemes": ["trino"], "handler": "airflow.providers.trino.assets.trino.sanitize_uri"} + { + "schemes": ["trino"], + "handler": "airflow.providers.trino.assets.trino.sanitize_uri", + "factory": "airflow.providers.trino.assets.trino.create_asset", + "to_openlineage_converter": "airflow.providers.trino.assets.trino.convert_asset_to_openlineage", + } ], "dataset-uris": [ - {"schemes": ["trino"], "handler": "airflow.providers.trino.assets.trino.sanitize_uri"} + { + "schemes": ["trino"], + "handler": "airflow.providers.trino.assets.trino.sanitize_uri", + "factory": "airflow.providers.trino.assets.trino.create_asset", + "to_openlineage_converter": "airflow.providers.trino.assets.trino.convert_asset_to_openlineage", + } ], "hooks": [{"integration-name": "Trino", "python-modules": ["airflow.providers.trino.hooks.trino"]}], "transfers": [ diff --git a/providers/trino/tests/unit/trino/assets/test_trino.py b/providers/trino/tests/unit/trino/assets/test_trino.py index a4210f7123cbe..2f0d68a94358d 100644 --- a/providers/trino/tests/unit/trino/assets/test_trino.py +++ b/providers/trino/tests/unit/trino/assets/test_trino.py @@ -21,7 +21,8 @@ import pytest -from airflow.providers.trino.assets.trino import sanitize_uri +from airflow.providers.common.compat.assets import Asset +from airflow.providers.trino.assets.trino import convert_asset_to_openlineage, create_asset, sanitize_uri @pytest.mark.parametrize( @@ -64,3 +65,20 @@ def test_sanitize_uri_fail_non_port() -> None: uri_i = urllib.parse.urlsplit("trino://example.com:abcd/catalog/schema/table") with pytest.raises(ValueError, match="Port could not be cast to integer value as 'abcd'"): sanitize_uri(uri_i) + + +def test_create_asset() -> None: + result = create_asset(host="example.com", catalog="hive", schema="default", table="users") + assert result == Asset(uri="trino://example.com:8080/hive/default/users") + + +def test_create_asset_custom_port() -> None: + result = create_asset(host="example.com", port=9090, catalog="hive", schema="default", table="users") + assert result == Asset(uri="trino://example.com:9090/hive/default/users") + + +def test_convert_asset_to_openlineage() -> None: + asset = Asset(uri="trino://example.com:8080/hive/default/users") + ol_dataset = convert_asset_to_openlineage(asset=asset, lineage_context=None) + assert ol_dataset.namespace == "trino://example.com:8080" + assert ol_dataset.name == "hive.default.users" diff --git a/providers/vertica/provider.yaml b/providers/vertica/provider.yaml index b48006e9f60b8..9cb255db4bbcd 100644 --- a/providers/vertica/provider.yaml +++ b/providers/vertica/provider.yaml @@ -82,6 +82,20 @@ hooks: python-modules: - airflow.providers.vertica.hooks.vertica +asset-uris: + - schemes: [vertica] + handler: airflow.providers.vertica.assets.vertica.sanitize_uri + factory: airflow.providers.vertica.assets.vertica.create_asset + to_openlineage_converter: airflow.providers.vertica.assets.vertica.convert_asset_to_openlineage + +# dataset has been renamed to asset in Airflow 3.0 +# This is kept for backward compatibility. +dataset-uris: + - schemes: [vertica] + handler: airflow.providers.vertica.assets.vertica.sanitize_uri + factory: airflow.providers.vertica.assets.vertica.create_asset + to_openlineage_converter: airflow.providers.vertica.assets.vertica.convert_asset_to_openlineage + connection-types: - hook-class-name: airflow.providers.vertica.hooks.vertica.VerticaHook hook-name: "Vertica" diff --git a/providers/vertica/src/airflow/providers/vertica/assets/__init__.py b/providers/vertica/src/airflow/providers/vertica/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/vertica/src/airflow/providers/vertica/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/vertica/src/airflow/providers/vertica/assets/vertica.py b/providers/vertica/src/airflow/providers/vertica/assets/vertica.py new file mode 100644 index 0000000000000..81419d03a549d --- /dev/null +++ b/providers/vertica/src/airflow/providers/vertica/assets/vertica.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from airflow.providers.common.compat.assets import Asset + +if TYPE_CHECKING: + from urllib.parse import SplitResult + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + +def sanitize_uri(uri: SplitResult) -> SplitResult: + if not uri.netloc: + raise ValueError("URI format vertica:// must contain a host") + if uri.port is None: + host = uri.netloc.rstrip(":") + uri = uri._replace(netloc=f"{host}:5433") + if len(uri.path.split("/")) != 4: # Leading slash, database, schema, and table names. + raise ValueError("URI format vertica:// must contain database, schema, and table names") + return uri + + +def create_asset( + *, host: str, database: str, schema: str, table: str, port: int = 5433, extra: dict | None = None +) -> Asset: + return Asset(uri=f"vertica://{host}:{port}/{database}/{schema}/{table}", extra=extra) + + +def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset: + """Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook.""" + from urllib.parse import urlsplit + + from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset + + parsed = urlsplit(asset.uri) + _, database, schema, table = parsed.path.split("/") # Leading slash, database, schema, and table names. + return OpenLineageDataset(namespace=f"vertica://{parsed.netloc}", name=f"{database}.{schema}.{table}") diff --git a/providers/vertica/src/airflow/providers/vertica/get_provider_info.py b/providers/vertica/src/airflow/providers/vertica/get_provider_info.py index 986485f5f061c..7e9d65c24850f 100644 --- a/providers/vertica/src/airflow/providers/vertica/get_provider_info.py +++ b/providers/vertica/src/airflow/providers/vertica/get_provider_info.py @@ -38,6 +38,22 @@ def get_provider_info(): "hooks": [ {"integration-name": "Vertica", "python-modules": ["airflow.providers.vertica.hooks.vertica"]} ], + "asset-uris": [ + { + "schemes": ["vertica"], + "handler": "airflow.providers.vertica.assets.vertica.sanitize_uri", + "factory": "airflow.providers.vertica.assets.vertica.create_asset", + "to_openlineage_converter": "airflow.providers.vertica.assets.vertica.convert_asset_to_openlineage", + } + ], + "dataset-uris": [ + { + "schemes": ["vertica"], + "handler": "airflow.providers.vertica.assets.vertica.sanitize_uri", + "factory": "airflow.providers.vertica.assets.vertica.create_asset", + "to_openlineage_converter": "airflow.providers.vertica.assets.vertica.convert_asset_to_openlineage", + } + ], "connection-types": [ { "hook-class-name": "airflow.providers.vertica.hooks.vertica.VerticaHook", diff --git a/providers/vertica/tests/unit/vertica/assets/__init__.py b/providers/vertica/tests/unit/vertica/assets/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/vertica/tests/unit/vertica/assets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/vertica/tests/unit/vertica/assets/test_vertica.py b/providers/vertica/tests/unit/vertica/assets/test_vertica.py new file mode 100644 index 0000000000000..0e1d8dc6c196a --- /dev/null +++ b/providers/vertica/tests/unit/vertica/assets/test_vertica.py @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import urllib.parse + +import pytest + +from airflow.providers.common.compat.assets import Asset +from airflow.providers.vertica.assets.vertica import ( + convert_asset_to_openlineage, + create_asset, + sanitize_uri, +) + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("vertica://host:5433/mydb/public/mytable", id="valid"), + ], +) +def test_sanitize_uri_pass(value: str) -> None: + result = sanitize_uri(urllib.parse.urlsplit(value)) + assert result.scheme == "vertica" + + +@pytest.mark.parametrize( + "value", + [ + pytest.param("vertica:///db/schema/table", id="missing-host"), + pytest.param("vertica://host:5433", id="missing-path"), + ], +) +def test_sanitize_uri_fail(value: str) -> None: + with pytest.raises(ValueError, match="must contain"): + sanitize_uri(urllib.parse.urlsplit(value)) + + +@pytest.mark.parametrize( + ("host", "database", "schema", "table", "port", "expected_uri"), + [ + pytest.param( + "myhost", + "mydb", + "public", + "mytable", + 5433, + "vertica://myhost:5433/mydb/public/mytable", + id="default-port", + ), + pytest.param("myhost", "db", "s", "t", 5444, "vertica://myhost:5444/db/s/t", id="custom-port"), + ], +) +def test_create_asset( + host: str, database: str, schema: str, table: str, port: int, expected_uri: str +) -> None: + result = create_asset(host=host, database=database, schema=schema, table=table, port=port) + assert result == Asset(uri=expected_uri) + + +@pytest.mark.parametrize( + ("uri", "expected_namespace", "expected_name"), + [ + pytest.param( + "vertica://myhost:5433/mydb/public/mytable", + "vertica://myhost:5433", + "mydb.public.mytable", + id="default-port", + ), + pytest.param( + "vertica://otherhost:5444/testdb/schema1/data", + "vertica://otherhost:5444", + "testdb.schema1.data", + id="custom-port", + ), + ], +) +def test_convert_asset_to_openlineage(uri: str, expected_namespace: str, expected_name: str) -> None: + asset = Asset(uri=uri) + result = convert_asset_to_openlineage(asset, None) + assert result.namespace == expected_namespace + assert result.name == expected_name diff --git a/task-sdk/tests/task_sdk/definitions/test_asset.py b/task-sdk/tests/task_sdk/definitions/test_asset.py index 28356eb6960eb..fb6a61954c927 100644 --- a/task-sdk/tests/task_sdk/definitions/test_asset.py +++ b/task-sdk/tests/task_sdk/definitions/test_asset.py @@ -152,8 +152,8 @@ def test_uri_with_password() -> None: "An Asset URI should not contain a password. User info has been automatically dropped." ) EmptyOperator(task_id="task1", outlets=[asset]) - assert asset.uri == "ftp://localhost/foo.txt" - assert os.fspath(asset) == "ftp://localhost/foo.txt" + assert asset.uri == "ftp://localhost:21/foo.txt" + assert os.fspath(asset) == "ftp://localhost:21/foo.txt" def test_uri_without_password() -> None: diff --git a/uv.lock b/uv.lock index e0084ec1cb511..bdadcacdf57b7 100644 --- a/uv.lock +++ b/uv.lock @@ -2735,7 +2735,7 @@ packages = [] [[package]] name = "apache-airflow-providers-airbyte" -version = "5.4.1" +version = "5.4.2" source = { editable = "providers/airbyte" } dependencies = [ { name = "airbyte-api" }, @@ -2862,7 +2862,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-amazon" -version = "9.27.0" +version = "9.28.0" source = { editable = "providers/amazon" } dependencies = [ { name = "apache-airflow" }, @@ -4025,7 +4025,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-cncf-kubernetes" -version = "10.16.1" +version = "10.17.0" source = { editable = "providers/cncf/kubernetes" } dependencies = [ { name = "aiofiles" }, @@ -4113,7 +4113,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-common-ai" -version = "0.1.1" +version = "0.2.0" source = { editable = "providers/common/ai" } dependencies = [ { name = "apache-airflow" }, @@ -4341,7 +4341,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-common-sql" -version = "1.35.0" +version = "1.36.0" source = { editable = "providers/common/sql" } dependencies = [ { name = "apache-airflow" }, @@ -4442,7 +4442,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-databricks" -version = "7.13.0" +version = "7.14.0" source = { editable = "providers/databricks" } dependencies = [ { name = "aiohttp" }, @@ -4584,7 +4584,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-dbt-cloud" -version = "4.8.1" +version = "4.8.2" source = { editable = "providers/dbt/cloud" } dependencies = [ { name = "aiohttp" }, @@ -4678,7 +4678,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-discord" -version = "3.12.2" +version = "3.12.3" source = { editable = "providers/discord" } dependencies = [ { name = "apache-airflow" }, @@ -4756,7 +4756,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-edge3" -version = "3.5.0" +version = "3.6.0" source = { editable = "providers/edge3" } dependencies = [ { name = "aiofiles" }, @@ -4895,7 +4895,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-fab" -version = "3.6.2" +version = "3.6.3" source = { editable = "providers/fab" } dependencies = [ { name = "apache-airflow" }, @@ -5059,7 +5059,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-git" -version = "0.3.1" +version = "0.3.2" source = { editable = "providers/git" } dependencies = [ { name = "apache-airflow" }, @@ -5133,7 +5133,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-google" -version = "21.2.0" +version = "21.3.0" source = { editable = "providers/google" } dependencies = [ { name = "apache-airflow" }, @@ -5815,7 +5815,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-microsoft-azure" -version = "13.1.2" +version = "13.2.0" source = { editable = "providers/microsoft/azure" } dependencies = [ { name = "adlfs" }, @@ -5891,7 +5891,7 @@ requires-dist = [ { name = "apache-airflow-providers-common-messaging", marker = "extra == 'common-messaging'", editable = "providers/common/messaging" }, { name = "apache-airflow-providers-oracle", marker = "extra == 'oracle'", editable = "providers/oracle" }, { name = "apache-airflow-providers-sftp", marker = "extra == 'sftp'", editable = "providers/sftp" }, - { name = "azure-batch", specifier = ">=8.0.0" }, + { name = "azure-batch", specifier = ">=8.0.0,<15.0.0" }, { name = "azure-cosmos", specifier = ">=4.6.0" }, { name = "azure-datalake-store", specifier = ">=0.0.45" }, { name = "azure-identity", specifier = ">=1.3.1" }, @@ -5949,6 +5949,9 @@ dependencies = [ ] [package.optional-dependencies] +common-compat = [ + { name = "apache-airflow-providers-common-compat" }, +] openlineage = [ { name = "apache-airflow-providers-openlineage" }, ] @@ -5957,6 +5960,7 @@ openlineage = [ dev = [ { name = "apache-airflow" }, { name = "apache-airflow-devel-common" }, + { name = "apache-airflow-providers-common-compat" }, { name = "apache-airflow-providers-common-sql" }, { name = "apache-airflow-providers-openlineage" }, { name = "apache-airflow-task-sdk" }, @@ -5968,18 +5972,20 @@ docs = [ [package.metadata] requires-dist = [ { name = "apache-airflow", editable = "." }, + { name = "apache-airflow-providers-common-compat", marker = "extra == 'common-compat'", editable = "providers/common/compat" }, { name = "apache-airflow-providers-common-sql", editable = "providers/common/sql" }, { name = "apache-airflow-providers-openlineage", marker = "extra == 'openlineage'", editable = "providers/openlineage" }, { name = "methodtools", specifier = ">=0.4.7" }, { name = "pymssql", marker = "python_full_version < '3.14'", specifier = ">=2.3.5" }, { name = "pymssql", marker = "python_full_version >= '3.14'", specifier = ">=2.3.13" }, ] -provides-extras = ["openlineage"] +provides-extras = ["openlineage", "common-compat"] [package.metadata.requires-dev] dev = [ { name = "apache-airflow", editable = "." }, { name = "apache-airflow-devel-common", editable = "devel-common" }, + { name = "apache-airflow-providers-common-compat", editable = "providers/common/compat" }, { name = "apache-airflow-providers-common-sql", editable = "providers/common/sql" }, { name = "apache-airflow-providers-openlineage", editable = "providers/openlineage" }, { name = "apache-airflow-task-sdk", editable = "task-sdk" }, @@ -6103,7 +6109,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-mysql" -version = "6.5.2" +version = "6.5.3" source = { editable = "providers/mysql" } dependencies = [ { name = "aiomysql" }, @@ -6333,7 +6339,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-openlineage" -version = "2.15.0" +version = "2.16.0" source = { editable = "providers/openlineage" } dependencies = [ { name = "apache-airflow" }, @@ -7431,7 +7437,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-standard" -version = "1.12.3" +version = "1.13.0" source = { editable = "providers/standard" } dependencies = [ { name = "apache-airflow" }, @@ -7813,7 +7819,7 @@ docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "d [[package]] name = "apache-airflow-providers-yandex" -version = "4.4.2" +version = "4.5.0" source = { editable = "providers/yandex" } dependencies = [ { name = "apache-airflow" },