diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d5c683daa303..00cc988eb0a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -31,7 +31,6 @@ import java.util.Optional; import java.util.function.Function; import java.util.regex.Pattern; -import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; @@ -132,7 +131,6 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableNullableList; import org.apache.calcite.util.Pair; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.CteSuggesterType; @@ -144,7 +142,6 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -319,7 +316,6 @@ import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; @@ -338,7 +334,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.joda.time.Interval; -import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.math.BigDecimal; @@ -1051,51 +1046,6 @@ boolean continueJoinMerge() { return !(runCBO && disableSemJoinReordering); } - @Override - Table materializeCTE(String cteName, CTEClause cte) throws HiveException { - - ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE)); - - ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME)); - tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName))); - - ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER)); - - createTable.addChild(tableName); - createTable.addChild(temporary); - createTable.addChild(cte.cteNode); - - CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState); - analyzer.initCtx(ctx); - analyzer.init(false); - - // should share cte contexts - analyzer.aliasToCTEs.putAll(aliasToCTEs); - - HiveOperation operation = queryState.getHiveOperation(); - try { - analyzer.analyzeInternal(createTable); - } finally { - queryState.setCommandType(operation); - } - - Table table = analyzer.tableDesc.toTable(conf); - Path location = table.getDataLocation(); - try { - location.getFileSystem(conf).mkdirs(location); - } catch (IOException e) { - throw new HiveException(e); - } - table.setMaterializedTable(true); - - LOG.info(cteName + " will be materialized into " + location); - cte.source = analyzer; - - ctx.addMaterializedTable(cteName, table, getMaterializedTableStats(analyzer.getSinkOp())); - - return table; - } - @Override String fixCtasColumnName(String colName) { if (runCBO) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1aec2ac86091..f8ec90287202 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -117,6 +117,7 @@ import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties; import org.apache.hadoop.hive.ql.ddl.DDLWork; import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc; +import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc; @@ -1568,7 +1569,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { createTable.addChild(temporary); createTable.addChild(cte.cteNode); - SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState); + SemanticAnalyzer analyzer = (SemanticAnalyzer) DDLSemanticAnalyzerFactory.getAnalyzer(createTable, queryState); analyzer.initCtx(ctx); analyzer.init(false); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java index dbdc79769dc8..17ac1b353bf2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java @@ -20,12 +20,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -46,6 +49,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory; +import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryProperties.QueryType; import org.apache.hadoop.hive.ql.QueryState; @@ -65,6 +70,7 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.MockedStatic; import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -493,4 +499,50 @@ private void checkTablesUsed(String query, Set tables) throws Exception Assert.assertEquals(new TreeSet<>(tables), new TreeSet<>(result)); } + + @Test + public void testMaterializeCTEWithCBODisabled() throws Exception { + testMaterializeCTEUsesDDLFactory(false); + } + + @Test + public void testMaterializeCTEWithCBOEnabled() throws Exception { + testMaterializeCTEUsesDDLFactory(true); + } + + private void testMaterializeCTEUsesDDLFactory(boolean cboEnabled) throws Exception { + HiveConf testConf = new HiveConf(conf); + testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, cboEnabled); + testConf.setIntVar(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD, 1); + + SessionState.start(testConf); + Context ctx = new Context(testConf); + + String query = "WITH cte AS (SELECT COUNT(*) AS cnt FROM table1) SELECT * FROM cte"; + + ASTNode astNode = ParseUtils.parse(query, ctx); + QueryState queryState = new QueryState.Builder().withHiveConf(testConf).build(); + BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode); + analyzer.initCtx(ctx); + + try (MockedStatic mocked = + mockStatic(DDLSemanticAnalyzerFactory.class, CALLS_REAL_METHODS)) { + BaseSemanticAnalyzer[] cteAnalyzer = new BaseSemanticAnalyzer[1]; + + mocked.when(() -> DDLSemanticAnalyzerFactory.getAnalyzer(any(ASTNode.class), any(QueryState.class))) + .thenAnswer(invocation -> { + BaseSemanticAnalyzer result = (BaseSemanticAnalyzer) invocation.callRealMethod(); + if (invocation.getArgument(0, ASTNode.class).getType() == HiveParser.TOK_CREATETABLE) { + cteAnalyzer[0] = result; + } + return result; + }); + + analyzer.analyze(astNode, ctx); + + assertNotNull("DDLSemanticAnalyzerFactory should be called for CTE materialization", cteAnalyzer[0]); + assertTrue("CTE materialization should use CreateTableAnalyzer", + cteAnalyzer[0] instanceof CreateTableAnalyzer); + } + } } diff --git a/ql/src/test/queries/clientpositive/cte_materialize.q b/ql/src/test/queries/clientpositive/cte_materialize.q new file mode 100644 index 000000000000..dd55f57d9889 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cte_materialize.q @@ -0,0 +1,12 @@ +-- Confirms HIVE-29559 fixes the HIVE-28724 NPE on the CBO fallback recompile path. +set hive.cbo.fallback.strategy=ALWAYS; + +explain +WITH cte AS ( + SELECT MAX(s) AS m FROM (SELECT 'a' AS s) t +) +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte); diff --git a/ql/src/test/results/clientpositive/llap/cte_materialize.q.out b/ql/src/test/results/clientpositive/llap/cte_materialize.q.out new file mode 100644 index 000000000000..93845395e73e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cte_materialize.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: explain +WITH cte AS ( + SELECT MAX(s) AS m FROM (SELECT 'a' AS s) t +) +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@cte +#### A masked pattern was here #### +POSTHOOK: query: explain +WITH cte AS ( + SELECT MAX(s) AS m FROM (SELECT 'a' AS s) t +) +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +UNION ALL +SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@cte +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max('a') + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.cte + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Edges: + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Map 3 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 7 <- Map 3 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 5 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'a' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: 'a' (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'a' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: 'a' (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'a' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: 'a' (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: cte + filterExpr: m is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: m is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: m (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 'a' (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'a' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 'a' (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'a' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 'a' (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'a' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 5 + Vertex: Union 5 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-3 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +