diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 717d1f8b6a7c..ab5eac684c8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -31,6 +31,7 @@ public class ColStatistics { private boolean isPrimaryKey; private boolean isEstimated; private boolean isFilteredColumn; + private boolean isConst; private byte[] bitVectors; private byte[] histogram; @@ -155,6 +156,8 @@ public String toString() { sb.append(" isEstimated: "); sb.append(isEstimated); + sb.append(" isConst: "); + sb.append(isConst); return sb.toString(); } @@ -171,6 +174,7 @@ public ColStatistics clone() { clone.setPrimaryKey(isPrimaryKey); clone.setIsEstimated(isEstimated); clone.setIsFilteredColumn(isFilteredColumn); + clone.setConst(isConst); if (range != null ) { clone.setRange(range.clone()); } @@ -232,4 +236,12 @@ public boolean isFilteredColumn() { return isFilteredColumn; } + public boolean isConst() { + return isConst; + } + + public void setConst(boolean isConst) { + this.isConst = isConst; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 55f9d0c1e158..2160c4965d02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -823,9 +823,11 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col if (numTrues == 0 && numFalses == 0) { // All NULL column - no non-null distinct values cs.setCountDistint(0); + cs.setConst(true); } else if (numTrues == 0 || numFalses == 0) { // One value type confirmed absent (=0), other is present (>0) or unknown (<0) cs.setCountDistint(1); + cs.setConst(csd.getBooleanStats().getNumNulls() == 0 && (numTrues > 0 || numFalses > 0)); } else { // Both != 0: either both present (>0), both unknown (<0), or one present + one unknown cs.setCountDistint(2); @@ -1646,6 +1648,7 @@ private static ColStatistics buildColStatForConstant(HiveConf conf, long numRows colStats.setAvgColLen(avgColSize); colStats.setCountDistint(countDistincts); colStats.setNumNulls(numNulls); + colStats.setConst(true); Optional value = getConstValue(encd); value.ifPresent(number -> colStats.setRange(number, number)); @@ -2112,7 +2115,7 @@ private static List extractNDVGroupingColumns(List colStats for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); - if (cs.getNumNulls() > 0) { + if (cs.getNumNulls() > 0 && (ndv > 0 || cs.isConst())) { ndv = StatsUtils.safeAdd(ndv, 1); } ndvValues.add(ndv); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index 4de2867de7c0..f83482d11275 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -62,6 +62,7 @@ public void add(ColStatistics stat) { if (stat.isFilteredColumn()) { result.setFilterColumn(); } + result.setConst(result.isConst() && stat.isConst()); } public Optional getResult() { return Optional.of(result); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index c009472fed0a..669c550402f6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -367,6 +367,46 @@ void testGetColStatisticsBooleanAllNull() { assertNotNull(cs); assertEquals(0, cs.getCountDistint(), "Boolean NDV should be 0 for all-NULL column"); + assertEquals(true, cs.isConst(), "all-NULL boolean column must be marked isConst"); + } + + @ParameterizedTest(name = "isConst=true for verified single-value boolean (numTrues={0}, numFalses={1}, numNulls=0)") + @org.junit.jupiter.params.provider.CsvSource({"100, 0", "0, 100"}) + void testGetColStatisticsBooleanIsConstForVerifiedSingleValue(long numTrues, long numFalses) { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("bool_col"); + cso.setColType(serdeConstants.BOOLEAN_TYPE_NAME); + BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); + boolStats.setNumTrues(numTrues); + boolStats.setNumFalses(numFalses); + boolStats.setNumNulls(0); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(boolStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "bool_col"); + + assertEquals(true, cs.isConst()); + } + + @ParameterizedTest(name = "isConst stays false for boolean (numTrues={0}, numFalses={1})") + @org.junit.jupiter.params.provider.CsvSource({ + "100, 0", "0, 100", "-1, 100", "100, -1", "50, 50"}) + void testGetColStatisticsBooleanIsConstNotSetForNonAllNullCases(long numTrues, long numFalses) { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("bool_col"); + cso.setColType(serdeConstants.BOOLEAN_TYPE_NAME); + BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); + boolStats.setNumTrues(numTrues); + boolStats.setNumFalses(numFalses); + boolStats.setNumNulls(10); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(boolStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "bool_col"); + + assertEquals(false, cs.isConst()); } @Test @@ -565,4 +605,24 @@ void testGetColStatisticsTimestampType() { assertEquals(1700000000L, range.maxValue.longValue(), "maxValue mismatch for TIMESTAMP"); } + @Test + void testColStatisticsIsConstDefaultsFalse() { + ColStatistics cs = new ColStatistics("c", "int"); + assertEquals(false, cs.isConst(), "isConst should default to false"); + } + + @Test + void testColStatisticsIsConstSetterAndClone() { + ColStatistics cs = new ColStatistics("c", "int"); + cs.setConst(true); + assertEquals(true, cs.isConst(), "setConst(true) should be observable via isConst()"); + + ColStatistics clone = cs.clone(); + assertEquals(true, clone.isConst(), "clone() must propagate isConst"); + + cs.setConst(false); + assertEquals(false, cs.isConst(), "setConst(false) should clear the flag"); + assertEquals(true, clone.isConst(), "Clone should be independent of source after mutation"); + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java index 98bc589e40d3..0241484835b0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java @@ -22,6 +22,8 @@ import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; class TestPessimisticStatCombiner { @@ -155,6 +157,21 @@ void testCombineBothUnknownNumTruesAndNumFalses() { assertEquals(-1, combined.getNumFalses(), "Both unknown should result in unknown (-1)"); } + @ParameterizedTest(name = "combine isConst({0}, {1}) = {2}") + @CsvSource({"true, true, true", "true, false, false", "false, true, false", "false, false, false"}) + void testCombineIsConstAndSemantics(boolean stat1Const, boolean stat2Const, boolean expected) { + ColStatistics stat1 = createStat("col1", "int", 0, 100, 4.0); + stat1.setConst(stat1Const); + ColStatistics stat2 = createStat("col2", "int", 0, 100, 4.0); + stat2.setConst(stat2Const); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + assertEquals(expected, combiner.getResult().get().isConst()); + } + private ColStatistics createStat(String name, String type, long ndv, long numNulls, double avgColLen) { ColStatistics stat = new ColStatistics(name, type); stat.setCountDistint(ndv); diff --git a/ql/src/test/queries/clientpositive/groupby_unknown_ndv.q b/ql/src/test/queries/clientpositive/groupby_unknown_ndv.q new file mode 100644 index 000000000000..12af3d064d59 --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_unknown_ndv.q @@ -0,0 +1,30 @@ +-- HIVE-29556: GROUP BY of a column with unknown NDV (NDV=0 sentinel + numNulls>0) +-- must not collapse to "1 row" estimate. Both probes feed a join sized so that +-- master's bogus 1-row estimate triggers Map Join while the heuristic fallback +-- forces Merge Join. + +SET hive.auto.convert.join=true; + +CREATE TABLE big (k BIGINT); +CREATE TABLE small (id BIGINT, name STRING); + +ALTER TABLE big UPDATE STATISTICS SET('numRows'='100000000'); +ALTER TABLE big UPDATE STATISTICS for column k SET ('numDVs'='0','numNulls'='100000000'); + +ALTER TABLE small UPDATE STATISTICS SET('numRows'='1000000'); +ALTER TABLE small UPDATE STATISTICS for column id SET ('numDVs'='1000000','numNulls'='0'); +ALTER TABLE small UPDATE STATISTICS for column name SET ('numDVs'='1000000','numNulls'='0','avgColLen'='100','maxColLen'='100'); + +-- U1: direct column with unknown NDV. +EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT k, COUNT(*) AS cnt FROM big GROUP BY k) g +JOIN small s ON g.cnt = s.id; + +-- U2: PessimisticStatCombiner output for CASE WHEN with one NULL branch. +EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT x, COUNT(*) AS cnt + FROM (SELECT CASE WHEN k > 0 THEN k ELSE cast(NULL AS BIGINT) END AS x FROM big) t + GROUP BY x) g +JOIN small s ON g.cnt = s.id; diff --git a/ql/src/test/results/clientpositive/llap/groupby_unknown_ndv.q.out b/ql/src/test/results/clientpositive/llap/groupby_unknown_ndv.q.out new file mode 100644 index 000000000000..700ce15d32e2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/groupby_unknown_ndv.q.out @@ -0,0 +1,322 @@ +PREHOOK: query: CREATE TABLE big (k BIGINT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@big +POSTHOOK: query: CREATE TABLE big (k BIGINT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@big +PREHOOK: query: CREATE TABLE small (id BIGINT, name STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@small +POSTHOOK: query: CREATE TABLE small (id BIGINT, name STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@small +PREHOOK: query: ALTER TABLE big UPDATE STATISTICS SET('numRows'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@big +PREHOOK: Output: default@big +POSTHOOK: query: ALTER TABLE big UPDATE STATISTICS SET('numRows'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@big +POSTHOOK: Output: default@big +PREHOOK: query: ALTER TABLE big UPDATE STATISTICS for column k SET ('numDVs'='0','numNulls'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@big +PREHOOK: Output: default@big +POSTHOOK: query: ALTER TABLE big UPDATE STATISTICS for column k SET ('numDVs'='0','numNulls'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@big +POSTHOOK: Output: default@big +PREHOOK: query: ALTER TABLE small UPDATE STATISTICS SET('numRows'='1000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@small +PREHOOK: Output: default@small +POSTHOOK: query: ALTER TABLE small UPDATE STATISTICS SET('numRows'='1000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@small +POSTHOOK: Output: default@small +PREHOOK: query: ALTER TABLE small UPDATE STATISTICS for column id SET ('numDVs'='1000000','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@small +PREHOOK: Output: default@small +POSTHOOK: query: ALTER TABLE small UPDATE STATISTICS for column id SET ('numDVs'='1000000','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@small +POSTHOOK: Output: default@small +PREHOOK: query: ALTER TABLE small UPDATE STATISTICS for column name SET ('numDVs'='1000000','numNulls'='0','avgColLen'='100','maxColLen'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@small +PREHOOK: Output: default@small +POSTHOOK: query: ALTER TABLE small UPDATE STATISTICS for column name SET ('numDVs'='1000000','numNulls'='0','avgColLen'='100','maxColLen'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@small +POSTHOOK: Output: default@small +PREHOOK: query: EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT k, COUNT(*) AS cnt FROM big GROUP BY k) g +JOIN small s ON g.cnt = s.id +PREHOOK: type: QUERY +PREHOOK: Input: default@big +PREHOOK: Input: default@small +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT k, COUNT(*) AS cnt FROM big GROUP BY k) g +JOIN small s ON g.cnt = s.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@big +POSTHOOK: Input: default@small +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: big + Statistics: Num rows: 100000000 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: k (type: bigint) + outputColumnNames: k + Statistics: Num rows: 100000000 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: k (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50000000 Data size: 400000008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 50000000 Data size: 400000008 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: bigint), name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25000000 Data size: 200000008 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT x, COUNT(*) AS cnt + FROM (SELECT CASE WHEN k > 0 THEN k ELSE cast(NULL AS BIGINT) END AS x FROM big) t + GROUP BY x) g +JOIN small s ON g.cnt = s.id +PREHOOK: type: QUERY +PREHOOK: Input: default@big +PREHOOK: Input: default@small +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT s.name, g.cnt +FROM (SELECT x, COUNT(*) AS cnt + FROM (SELECT CASE WHEN k > 0 THEN k ELSE cast(NULL AS BIGINT) END AS x FROM big) t + GROUP BY x) g +JOIN small s ON g.cnt = s.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@big +POSTHOOK: Input: default@small +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: big + Statistics: Num rows: 100000000 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((k > 0L), k, null) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 100000000 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50000000 Data size: 400000008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 50000000 Data size: 400000008 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: bigint), name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25000000 Data size: 200000008 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 25000000 Data size: 200000000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000000 Data size: 192000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out index d4d9cb53e2b9..0b708705624b 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out @@ -2414,13 +2414,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 150 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: binary) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: binary) - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 150 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -2432,16 +2432,16 @@ STAGE PLANS: keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hex(_col0) (type: string), _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: binary) null sort order: z sort order: + - Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -2449,10 +2449,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 14400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 14400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat