From 382d06594530199e384fb5a99d1dce62b87de313 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 4 May 2026 09:00:44 -0600 Subject: [PATCH 1/2] build: Enable Spark SQL tests for Spark 4.2.0-preview4 Adds dev/diffs/4.2.0-preview4.diff so the Spark SQL test suite can run against Apache Spark 4.2.0-preview4 with Comet enabled, and wires the 4.2 profile into the spark_sql_test workflow matrix. The diff was seeded from 4.1.1.diff and reconciled against v4.2.0-preview4. --- .github/workflows/spark_sql_test.yml | 1 + dev/diffs/4.2.0-preview4.diff | 4311 ++++++++++++++++++++++++++ pom.xml | 3 +- 3 files changed, 4313 insertions(+), 2 deletions(-) create mode 100644 dev/diffs/4.2.0-preview4.diff diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 980629174f..146c32c7fe 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -142,6 +142,7 @@ jobs: - {spark-short: '4.0', spark-full: '4.0.2', java: 17, scan-impl: 'auto'} - {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'} - {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'} + - {spark-short: '4.2', spark-full: '4.2.0-preview4', java: 17, scan-impl: 'auto'} fail-fast: false name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }} # Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite diff --git a/dev/diffs/4.2.0-preview4.diff b/dev/diffs/4.2.0-preview4.diff new file mode 100644 index 0000000000..824209ff97 --- /dev/null +++ b/dev/diffs/4.2.0-preview4.diff @@ -0,0 +1,4311 @@ +diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala +index 6df8bc85b51..dabb75e2b75 100644 +--- a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala ++++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala +@@ -268,6 +268,11 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { + } + + test("Upload from all decommissioned executors") { ++ // Comet replaces Spark's shuffle with its own native shuffle, which is incompatible with ++ // the fallback storage migration path used by BlockManagerDecommissioner. ++ val cometEnv = System.getenv("ENABLE_COMET") ++ assume(cometEnv == null || cometEnv == "0" || cometEnv == "false", ++ "Skipped when Comet is enabled: incompatible with Comet native shuffle storage") + sc = new SparkContext(getSparkConf(2, 2)) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 2, 60000) +@@ -298,6 +303,11 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { + } + + test("Upload multi stages") { ++ // Comet replaces Spark's shuffle with its own native shuffle, which is incompatible with ++ // the fallback storage migration path used by BlockManagerDecommissioner. ++ val cometEnv = System.getenv("ENABLE_COMET") ++ assume(cometEnv == null || cometEnv == "0" || cometEnv == "false", ++ "Skipped when Comet is enabled: incompatible with Comet native shuffle storage") + sc = new SparkContext(getSparkConf()) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 1, 60000) +@@ -332,6 +342,11 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { + + CompressionCodec.shortCompressionCodecNames.keys.foreach { codec => + test(s"$codec - Newly added executors should access old data from remote storage") { ++ // Comet replaces Spark's shuffle with its own native shuffle, which is incompatible with ++ // the fallback storage migration path used by BlockManagerDecommissioner. ++ val cometEnv = System.getenv("ENABLE_COMET") ++ assume(cometEnv == null || cometEnv == "0" || cometEnv == "false", ++ "Skipped when Comet is enabled: incompatible with Comet native shuffle storage") + sc = new SparkContext(getSparkConf(2, 0).set(IO_COMPRESSION_CODEC, codec)) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 2, 60000) +diff --git a/pom.xml b/pom.xml +index 1e7774b3ae6..b18f4a83d31 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -152,6 +152,8 @@ + 4.0.3 + 2.5.3 + 2.0.8 ++ 4.1 ++ 0.16.0-SNAPSHOT + + + org.apache.datasketches +diff --git a/sql/core/pom.xml b/sql/core/pom.xml +index cd7b2fe7805..cbd987e29b8 100644 +--- a/sql/core/pom.xml ++++ b/sql/core/pom.xml +@@ -97,6 +97,10 @@ + org.apache.spark + spark-tags_${scala.binary.version} + ++ ++ org.apache.datafusion ++ comet-spark-spark${spark.version.short}_${scala.binary.version} ++ + + + spark-4.2 2.13.18 From 45273c0dbc59f843cdeddac5c068c910ab12d064 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 4 May 2026 09:22:54 -0600 Subject: [PATCH 2/2] fix: set spark.version.short to 4.2 in 4.2.0-preview4 diff Inherited from 4.1.1.diff during reconciliation. The Spark build was trying to resolve comet-spark-spark4.1_2.13 instead of the 4.2 artifact, causing the sql_hive jobs to fail before any tests ran. --- dev/diffs/4.2.0-preview4.diff | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/diffs/4.2.0-preview4.diff b/dev/diffs/4.2.0-preview4.diff index 824209ff97..059d914b35 100644 --- a/dev/diffs/4.2.0-preview4.diff +++ b/dev/diffs/4.2.0-preview4.diff @@ -39,14 +39,14 @@ index 6df8bc85b51..dabb75e2b75 100644 withSpark(sc) { sc => TestUtils.waitUntilExecutorsUp(sc, 2, 60000) diff --git a/pom.xml b/pom.xml -index 1e7774b3ae6..b18f4a83d31 100644 +index 1e7774b3ae6..6d36b281332 100644 --- a/pom.xml +++ b/pom.xml @@ -152,6 +152,8 @@ 4.0.3 2.5.3 2.0.8 -+ 4.1 ++ 4.2 + 0.16.0-SNAPSHOT