diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 37e91652fb88..7fa8f3557722 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2286,6 +2286,14 @@ public static List getColumnNamesFromFieldSchema(List partC return names; } + public static List getColumnTypesFromFieldSchema(List fieldSchemas) { + List types = new ArrayList<>(); + for (FieldSchema fs : fieldSchemas) { + types.add(fs.getType()); + } + return types; + } + public static List getInternalColumnNamesFromSignature(List colInfos) { List names = new ArrayList(); for (ColumnInfo ci : colInfos) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ee80fc475299..6fde2b5980b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -103,37 +104,32 @@ private boolean shouldRewrite(ASTNode tree) { } /** - * Get the names of the columns that support column statistics. + * Get the Field Schemas of the columns that support column statistics. */ - private static List getColumnNamesSupportingStats(Table tbl) { - List colNames = new ArrayList<>(); + private static List getStatsEligibleFieldSchemas(Table tbl) { + List result = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (isSupported) { - colNames.add(col.getName()); + result.add(col); } } - return colNames; + return result; } - private List getColumnName(ASTNode tree) throws SemanticException { - - switch (tree.getChildCount()) { - case 2: - return getColumnNamesSupportingStats(tbl); - case 3: - int numCols = tree.getChild(2).getChildCount(); - List colName = new ArrayList<>(numCols); - for (int i = 0; i < numCols; i++) { - colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); - } - return colName; - default: - throw new SemanticException("Internal error. Expected number of children of ASTNode to be" - + " either 2 or 3. Found : " + tree.getChildCount()); + private List getExplicitColumnNamesFromAst(ASTNode tree) throws SemanticException { + if (tree.getChildCount() != 3) { + throw new SemanticException("Internal error. Expected number of children of ASTNode should be 3. Found : " + + tree.getChildCount()); + } + int numCols = tree.getChild(2).getChildCount(); + List colName = new ArrayList<>(numCols); + for (int i = 0; i < numCols; i++) { + colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); } + return colName; } private void handlePartialPartitionSpec(Map partSpec, ColumnStatsAutoGatherContext context) throws @@ -217,28 +213,27 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getColumnTypes(Table tbl, List colNames) { - List colTypes = new ArrayList<>(); + protected static List getFieldSchemasByColName(Table tbl, List colNames) { List cols = tbl.getCols(); - List copyColNames = new ArrayList<>(colNames); - - for (String colName : copyColNames) { - for (FieldSchema col : cols) { - if (colName.equalsIgnoreCase(col.getName())) { - String type = col.getType(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); - boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); - if (!isSupported) { - logTypeWarning(colName, type); - colNames.remove(colName); - } else { - colTypes.add(type); - } + Map colFsMap = new HashMap<>(); + for (FieldSchema col : cols) { + colFsMap.put(col.getName().toLowerCase(), col); + } + List result = new ArrayList<>(); + for (String colName : colNames) { + FieldSchema fs = colFsMap.get(colName.toLowerCase()); + if (fs != null) { + String type = fs.getType(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); + if (!isSupported) { + logTypeWarning(colName, type); + } else { + result.add(new FieldSchema(colName, type, fs.getComment())); } } } - - return colTypes; + return result; } private String genRewrittenQuery(List colNames, List colTypes, HiveConf conf, @@ -257,10 +252,11 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = ColumnStatsSemanticAnalyzer.getColumnTypes(tbl, colNames); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, colNames, colTypes, conf, partTransformSpec, -1, partSpec, isPartitionStats, true); + tbl, Utilities.getColumnNamesFromFieldSchema(columnSchemas), + Utilities.getColumnTypesFromFieldSchema(columnSchemas), conf, partTransformSpec, -1, partSpec, + isPartitionStats, true); } private static String genRewrittenQuery(Table tbl, List colNames, List colTypes, @@ -634,8 +630,6 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { */ if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); - // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); @@ -643,9 +637,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { Map> partTransformSpecs = Collections.singletonMap(-1, null); Map partSpec = (isPartitionStats) ? AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf) : null; - checkForPartitionColumns( - colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { handlePartialPartitionSpec(partSpec, null); @@ -653,7 +646,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = getColumnTypes(tbl, colNames); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -709,15 +703,13 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); List partTransformSpec = null; Map partSpec = null; - checkForPartitionColumns(colNames, - Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); @@ -726,7 +718,8 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = getColumnTypes(tbl, colNames); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -736,6 +729,25 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) return rewrittenTree; } + protected List getColumnsFromAst(ASTNode ast) throws SemanticException { + List statsEligibleFS = null; + List columnNames; + if (ast.getChildCount() == 2) { + statsEligibleFS = getStatsEligibleFieldSchemas(tbl); + columnNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + } else{ + columnNames = getExplicitColumnNamesFromAst(ast); + } + + checkForPartitionColumns(columnNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(columnNames); + + if (statsEligibleFS != null) { + return statsEligibleFS; + } + return getFieldSchemasByColName(tbl, columnNames); + } + AnalyzeRewriteContext getAnalyzeRewriteContext() { AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); @@ -749,10 +761,9 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = getColumnTypes(tbl, colNames); - analyzeRewrite.setColName(colNames); - analyzeRewrite.setColType(colTypes); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); + analyzeRewrite.setColName(Utilities.getColumnNamesFromFieldSchema(columnSchemas)); + analyzeRewrite.setColType(Utilities.getColumnTypesFromFieldSchema(columnSchemas)); return analyzeRewrite; }