From 9658e73d30fd32fb110a4307aed6444ad9cf942f Mon Sep 17 00:00:00 2001 From: illiabarbashov-sketch Date: Fri, 17 Apr 2026 14:38:56 +0200 Subject: [PATCH 1/2] HIVE-29574: merge join skew check --- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 ++++ .../hive/ql/exec/CommonMergeJoinOperator.java | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9db102852f8f..febdaa39fddb 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1737,6 +1737,11 @@ public static enum ConfVars { "How many rows in the right-most join operand Hive should buffer before emitting the join result."), HIVE_JOIN_CACHE_SIZE("hive.join.cache.size", 25000, "How many rows in the joining tables (except the streaming table) should be cached in memory."), + HIVE_MERGE_JOIN_SKEW_THRESHOLD("hive.merge.join.skew.threshold", 100_000, + "Maximum number of rows allowed per join key in a single Tez sort-merge join task before a " + + "skew event is reported."), + HIVE_MERGE_JOIN_SKEW_ABORT("hive.merge.join.skew.abort", false, + "When set to true and the row count is equal to hive.merge.join.skew.threshold, the Tez task will be aborted."), HIVE_PUSH_RESIDUAL_INNER("hive.join.inner.residual", false, "Whether to push non-equi filter predicates within inner joins. This can improve efficiency in " + "the evaluation of certain joins, since we will not be emitting rows which are thrown away by " diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index f9e7a40e10e7..4660ade7a567 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -97,6 +97,9 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator key) throws HiveException { List keyWritable = keyWritables[alias]; if (keyWritable == null) { From 4af5fc63585f92622bd9b58d1f4955a1e0e5d33f Mon Sep 17 00:00:00 2001 From: illiabarbashov-sketch Date: Tue, 28 Apr 2026 09:59:53 +0200 Subject: [PATCH 2/2] HIVE-29574: queries clientpositive and clientnegative were added --- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../hive/ql/exec/CommonMergeJoinOperator.java | 34 +--- .../monitoring/SkewedMergeJoinMonitor.java | 73 +++++++ .../TestCommonMergeJoinSkewThreshold.java | 100 ++++++++++ .../clientnegative/mergejoin_skew_abort.q | 20 ++ .../clientpositive/mergejoin_skew_warn.q | 35 ++++ .../clientnegative/mergejoin_skew_abort.q.out | 70 +++++++ .../llap/mergejoin_skew_warn.q.out | 187 ++++++++++++++++++ 8 files changed, 496 insertions(+), 25 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/SkewedMergeJoinMonitor.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/exec/TestCommonMergeJoinSkewThreshold.java create mode 100644 ql/src/test/queries/clientnegative/mergejoin_skew_abort.q create mode 100644 ql/src/test/queries/clientpositive/mergejoin_skew_warn.q create mode 100644 ql/src/test/results/clientnegative/mergejoin_skew_abort.q.out create mode 100644 ql/src/test/results/clientpositive/llap/mergejoin_skew_warn.q.out diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index febdaa39fddb..66ecee81dd85 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1737,7 +1737,7 @@ public static enum ConfVars { "How many rows in the right-most join operand Hive should buffer before emitting the join result."), HIVE_JOIN_CACHE_SIZE("hive.join.cache.size", 25000, "How many rows in the joining tables (except the streaming table) should be cached in memory."), - HIVE_MERGE_JOIN_SKEW_THRESHOLD("hive.merge.join.skew.threshold", 100_000, + HIVE_MERGE_JOIN_SKEW_THRESHOLD("hive.merge.join.skew.threshold", -1L, "Maximum number of rows allowed per join key in a single Tez sort-merge join task before a " + "skew event is reported."), HIVE_MERGE_JOIN_SKEW_ABORT("hive.merge.join.skew.abort", false, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index 4660ade7a567..52a22e71dcd0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -27,6 +27,7 @@ import java.util.TreeSet; import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource; +import org.apache.hadoop.hive.ql.exec.tez.monitoring.SkewedMergeJoinMonitor; import org.apache.hadoop.hive.ql.util.NullOrdering; import org.apache.hadoop.hive.serde.serdeConstants; import org.slf4j.Logger; @@ -97,8 +98,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator key) throws HiveException { List keyWritable = keyWritables[alias]; if (keyWritable == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/SkewedMergeJoinMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/SkewedMergeJoinMonitor.java new file mode 100644 index 000000000000..c28f00ce83d4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/SkewedMergeJoinMonitor.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez.monitoring; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SkewedMergeJoinMonitor { + + private transient long mergeJoinSkewThreshold; + private transient boolean mergeJoinSkewAbort; + private transient boolean[] skewedKeyFlagged; + + private static final Logger LOG = LoggerFactory.getLogger(SkewedMergeJoinMonitor.class.getName()); + + public SkewedMergeJoinMonitor(long mergeJoinSkewThreshold, boolean mergeJoinSkewAbort, int maxAlias) { + this.mergeJoinSkewThreshold = mergeJoinSkewThreshold; + this.mergeJoinSkewAbort = mergeJoinSkewAbort; + skewedKeyFlagged = new boolean[maxAlias]; + } + + public boolean isActive() { + return mergeJoinSkewThreshold > 0; + } + + public boolean shouldBeFlagged(byte alias, long rowCount) { + return rowCount >= mergeJoinSkewThreshold && !skewedKeyFlagged[alias]; + } + + public boolean isFlagged(int alias) { + return skewedKeyFlagged[alias]; + } + + public void checkMergeJoinSkew(byte alias, long rowCount) throws HiveException { + if (!isActive()) { + return; + } + + if (!shouldBeFlagged(alias, rowCount)) { + return; + } + + skewedKeyFlagged[alias] = true; + + String msg = String.format( + "Data skew detected in merge join, " + + "table alias %d has accumulated %d rows.", + alias, rowCount); + + if (mergeJoinSkewAbort) { + throw new HiveException(msg); + } else { + LOG.warn(msg); + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestCommonMergeJoinSkewThreshold.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestCommonMergeJoinSkewThreshold.java new file mode 100644 index 000000000000..b70225645202 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestCommonMergeJoinSkewThreshold.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.ql.exec.tez.monitoring.SkewedMergeJoinMonitor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestCommonMergeJoinSkewThreshold { + + private CommonMergeJoinOperator op; + + @Before + public void setUp() { + op = new CommonMergeJoinOperator(); + } + + @Test + public void testDisabled_noWarnNoThrow() throws HiveException { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + -1L, false, 4); + + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, Long.MAX_VALUE); + } + + @Test + public void testBelowThreshold_isOk() throws HiveException { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + 1000L, false, 4); + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, 999L); + } + + @Test + public void testAtThreshold_warnOnce() throws HiveException { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + 500L, false, 4); + + // should warn without throwing + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, 500L); + + Assert.assertTrue("skewedKeyFlagged[0] must be set after the first crossing", + op.skewedMergeJoinMonitor.isFlagged(0)); + + } + + @Test + public void testFlagsAreIndependentPerTag() throws HiveException { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + 100L, false, 4); + + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, 200L); + Assert.assertTrue("tag 0 should be flagged", op.skewedMergeJoinMonitor.isFlagged(0)); + Assert.assertFalse("tag 1 should still be clear", op.skewedMergeJoinMonitor.isFlagged(1)); + + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 1, 150L); + Assert.assertTrue("tag 1 should now be flagged", op.skewedMergeJoinMonitor.isFlagged(1)); + } + + @Test + public void testAbortMode_belowThreshold_noThrow() throws HiveException { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + 100L, true, 4); + + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, 99L); + } + + @Test + public void testAbortMode_throwsHiveException() { + op.skewedMergeJoinMonitor = new SkewedMergeJoinMonitor( + 100L, true, 4); + + try { + op.skewedMergeJoinMonitor.checkMergeJoinSkew((byte) 0, 200L); + Assert.fail("Expected HiveException to be thrown in abort mode"); + } catch (HiveException e) { + String msg = e.getMessage(); + Assert.assertNotNull(msg); + Assert.assertTrue("Message should mention row count 200", msg.contains("200")); + } + } +} + diff --git a/ql/src/test/queries/clientnegative/mergejoin_skew_abort.q b/ql/src/test/queries/clientnegative/mergejoin_skew_abort.q new file mode 100644 index 000000000000..aa811871669b --- /dev/null +++ b/ql/src/test/queries/clientnegative/mergejoin_skew_abort.q @@ -0,0 +1,20 @@ +SET hive.vectorized.execution.enabled=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.cbo.enable=false; +set hive.auto.convert.join=false; +set hive.optimize.ppd=false; +-- merge join observability config, with true should throw exception after skew +-- join detected beyond the threshold +set hive.merge.join.skew.threshold=2; +set hive.merge.join.skew.abort=true; + +CREATE TABLE merge_skew_abort_a (key int, value string) STORED AS TEXTFILE; +CREATE TABLE merge_skew_abort_b (key int, value string) STORED AS TEXTFILE; + +INSERT INTO TABLE merge_skew_abort_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'),(2, 'b1'); +INSERT INTO TABLE merge_skew_abort_b VALUES (1, 'x1'), (2, 'y1'); + +SELECT a.key, a.value, b.value +FROM merge_skew_abort_a a JOIN merge_skew_abort_b b ON a.key = b.key; + diff --git a/ql/src/test/queries/clientpositive/mergejoin_skew_warn.q b/ql/src/test/queries/clientpositive/mergejoin_skew_warn.q new file mode 100644 index 000000000000..87601d3530ff --- /dev/null +++ b/ql/src/test/queries/clientpositive/mergejoin_skew_warn.q @@ -0,0 +1,35 @@ +SET hive.vectorized.execution.enabled=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.cbo.enable=false; +set hive.auto.convert.join=false; +set hive.optimize.ppd=false; +set hive.merge.join.skew.threshold=2; +set hive.merge.join.skew.abort=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE merge_skew_warn_a (key int, value string) STORED AS TEXTFILE; +CREATE TABLE merge_skew_warn_b (key int, value string) STORED AS TEXTFILE; + +INSERT INTO TABLE merge_skew_warn_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'), +(2, 'b1'), (3, 'c1'); +INSERT INTO TABLE merge_skew_warn_b VALUES (1, 'x1'), (2, 'y1'), (3, 'z1'); + +EXPLAIN +SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key; + +SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key; + +SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key; + +-- no warning run +set hive.merge.join.skew.threshold=-1; + +SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key; + +DROP TABLE merge_skew_warn_a; +DROP TABLE merge_skew_warn_b; + diff --git a/ql/src/test/results/clientnegative/mergejoin_skew_abort.q.out b/ql/src/test/results/clientnegative/mergejoin_skew_abort.q.out new file mode 100644 index 000000000000..1ce3dabfe440 --- /dev/null +++ b/ql/src/test/results/clientnegative/mergejoin_skew_abort.q.out @@ -0,0 +1,70 @@ +PREHOOK: query: CREATE TABLE merge_skew_abort_a (key int, value string) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_abort_a +POSTHOOK: query: CREATE TABLE merge_skew_abort_a (key int, value string) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_abort_a +PREHOOK: query: CREATE TABLE merge_skew_abort_b (key int, value string) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_abort_b +POSTHOOK: query: CREATE TABLE merge_skew_abort_b (key int, value string) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_abort_b +PREHOOK: query: INSERT INTO TABLE merge_skew_abort_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'),(2, 'b1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@merge_skew_abort_a +POSTHOOK: query: INSERT INTO TABLE merge_skew_abort_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'),(2, 'b1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@merge_skew_abort_a +POSTHOOK: Lineage: merge_skew_abort_a.key SCRIPT [] +POSTHOOK: Lineage: merge_skew_abort_a.value SCRIPT [] +PREHOOK: query: INSERT INTO TABLE merge_skew_abort_b VALUES (1, 'x1'), (2, 'y1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@merge_skew_abort_b +POSTHOOK: query: INSERT INTO TABLE merge_skew_abort_b VALUES (1, 'x1'), (2, 'y1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@merge_skew_abort_b +POSTHOOK: Lineage: merge_skew_abort_b.key SCRIPT [] +POSTHOOK: Lineage: merge_skew_abort_b.value SCRIPT [] +PREHOOK: query: SELECT a.key, a.value, b.value +FROM merge_skew_abort_a a JOIN merge_skew_abort_b b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge_skew_abort_a +PREHOOK: Input: default@merge_skew_abort_b +#### A masked pattern was here #### +Status: Failed +Vertex failed, vertexName=Reducer 2, vertexId=vertex_#ID#, diagnostics=[Task failed, taskId=task_#ID#, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Data skew detected in merge join, table alias 0 has accumulated 2 rows. +#### A masked pattern was here #### +], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Data skew detected in merge join, table alias 0 has accumulated 2 rows. +#### A masked pattern was here #### +]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Reducer 2] killed/failed due to:OWN_TASK_FAILURE] +DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0 +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Reducer 2, vertexId=vertex_#ID#, diagnostics=[Task failed, taskId=task_#ID#, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Data skew detected in merge join, table alias 0 has accumulated 2 rows. +#### A masked pattern was here #### +], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row +#### A masked pattern was here #### +Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Data skew detected in merge join, table alias 0 has accumulated 2 rows. +#### A masked pattern was here #### +]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Reducer 2] killed/failed due to:OWN_TASK_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0 diff --git a/ql/src/test/results/clientpositive/llap/mergejoin_skew_warn.q.out b/ql/src/test/results/clientpositive/llap/mergejoin_skew_warn.q.out new file mode 100644 index 000000000000..c1afc9c5697b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mergejoin_skew_warn.q.out @@ -0,0 +1,187 @@ +PREHOOK: query: CREATE TABLE merge_skew_warn_a (key int, value string) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_warn_a +POSTHOOK: query: CREATE TABLE merge_skew_warn_a (key int, value string) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_warn_a +PREHOOK: query: CREATE TABLE merge_skew_warn_b (key int, value string) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_warn_b +POSTHOOK: query: CREATE TABLE merge_skew_warn_b (key int, value string) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_warn_b +PREHOOK: query: INSERT INTO TABLE merge_skew_warn_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'), +(2, 'b1'), (3, 'c1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@merge_skew_warn_a +POSTHOOK: query: INSERT INTO TABLE merge_skew_warn_a VALUES (1, 'a1'), (1, 'a2'), (1, 'a3'), (1, 'a4'), +(2, 'b1'), (3, 'c1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@merge_skew_warn_a +POSTHOOK: Lineage: merge_skew_warn_a.key SCRIPT [] +POSTHOOK: Lineage: merge_skew_warn_a.value SCRIPT [] +PREHOOK: query: INSERT INTO TABLE merge_skew_warn_b VALUES (1, 'x1'), (2, 'y1'), (3, 'z1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@merge_skew_warn_b +POSTHOOK: query: INSERT INTO TABLE merge_skew_warn_b VALUES (1, 'x1'), (2, 'y1'), (3, 'z1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@merge_skew_warn_b +POSTHOOK: Lineage: merge_skew_warn_b.key SCRIPT [] +POSTHOOK: Lineage: merge_skew_warn_b.value SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge_skew_warn_a +PREHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge_skew_warn_a +POSTHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 6 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge_skew_warn_a +PREHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.key, a.value, b.value +FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge_skew_warn_a +POSTHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +1 a1 x1 +1 a2 x1 +1 a3 x1 +1 a4 x1 +2 b1 y1 +3 c1 z1 +PREHOOK: query: SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge_skew_warn_a +PREHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge_skew_warn_a +POSTHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +6 +PREHOOK: query: SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge_skew_warn_a +PREHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM merge_skew_warn_a a JOIN merge_skew_warn_b b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge_skew_warn_a +POSTHOOK: Input: default@merge_skew_warn_b +#### A masked pattern was here #### +6 +PREHOOK: query: DROP TABLE merge_skew_warn_a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge_skew_warn_a +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_warn_a +POSTHOOK: query: DROP TABLE merge_skew_warn_a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge_skew_warn_a +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_warn_a +PREHOOK: query: DROP TABLE merge_skew_warn_b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge_skew_warn_b +PREHOOK: Output: database:default +PREHOOK: Output: default@merge_skew_warn_b +POSTHOOK: query: DROP TABLE merge_skew_warn_b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge_skew_warn_b +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge_skew_warn_b