From 63fa5afb848f97955388c888116257e1bb3eef78 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sat, 18 Apr 2026 18:08:16 +0530 Subject: [PATCH 1/8] HIVE-29551: Avoid quadratic runtime in ColumnStatsSemanticAnalyzer#getColumnTypes --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ee80fc475299..9f59f3ed466a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -220,24 +221,29 @@ private static String getColTypeOf(Table tbl, String partKey) { protected static List getColumnTypes(Table tbl, List colNames) { List colTypes = new ArrayList<>(); List cols = tbl.getCols(); - List copyColNames = new ArrayList<>(colNames); - - for (String colName : copyColNames) { - for (FieldSchema col : cols) { - if (colName.equalsIgnoreCase(col.getName())) { - String type = col.getType(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); - boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); - if (!isSupported) { - logTypeWarning(colName, type); - colNames.remove(colName); - } else { - colTypes.add(type); - } + Map colTypeMap = new HashMap<>(); + + for (FieldSchema col : cols) { + colTypeMap.put(col.getName().toLowerCase(), col.getType()); + } + + List nonPrimColNames = new ArrayList<>(); + for (String colName : colNames) { + String type = colTypeMap.get(colName.toLowerCase()); + if (type != null) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); + if (!isSupported) { + logTypeWarning(colName, type); + } else { + nonPrimColNames.add(colName); + colTypes.add(type); } } } + colNames.clear(); + colNames.addAll(nonPrimColNames); return colTypes; } From b3bb0a5edd3df89b04f9aabdb038084aa6fa3178 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 19 Apr 2026 13:03:02 +0530 Subject: [PATCH 2/8] Update the wrong column name used --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 9f59f3ed466a..deb980c56cbf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -227,7 +227,7 @@ protected static List getColumnTypes(Table tbl, List colNames) { colTypeMap.put(col.getName().toLowerCase(), col.getType()); } - List nonPrimColNames = new ArrayList<>(); + List primColNames = new ArrayList<>(); for (String colName : colNames) { String type = colTypeMap.get(colName.toLowerCase()); if (type != null) { @@ -236,14 +236,14 @@ protected static List getColumnTypes(Table tbl, List colNames) { if (!isSupported) { logTypeWarning(colName, type); } else { - nonPrimColNames.add(colName); + primColNames.add(colName); colTypes.add(type); } } } colNames.clear(); - colNames.addAll(nonPrimColNames); + colNames.addAll(primColNames); return colTypes; } From 85c0ebeb1176f47cd0b3743ec82c577411122529 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 26 Apr 2026 13:17:51 +0530 Subject: [PATCH 3/8] Refactor code to incorporate logic for different ast children values --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 80 ++++++++++++------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index deb980c56cbf..3f9c4489adc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,38 +103,53 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } + private static final class StatsEligibleColumns { + private final List columnNames; + private final List columnTypes; + + private StatsEligibleColumns(List columnNames, List columnTypes) { + this.columnNames = columnNames; + this.columnTypes = columnTypes; + } + + List getColumnNames() { + return columnNames; + } + + List getColumnTypes() { + return columnTypes; + } + } + /** - * Get the names of the columns that support column statistics. + * Get the names and types of the columns that support column statistics. */ - private static List getColumnNamesSupportingStats(Table tbl) { + private static StatsEligibleColumns getStatsEligibleColumns(Table tbl) { List colNames = new ArrayList<>(); + List colTypes = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (isSupported) { colNames.add(col.getName()); + colTypes.add(col.getType()); } } - return colNames; + return new StatsEligibleColumns(colNames, colTypes); } private List getColumnName(ASTNode tree) throws SemanticException { - - switch (tree.getChildCount()) { - case 2: - return getColumnNamesSupportingStats(tbl); - case 3: - int numCols = tree.getChild(2).getChildCount(); - List colName = new ArrayList<>(numCols); - for (int i = 0; i < numCols; i++) { - colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); - } - return colName; - default: + if (tree.getChildCount() != 3) { throw new SemanticException("Internal error. Expected number of children of ASTNode to be" + " either 2 or 3. Found : " + tree.getChildCount()); } + int numCols = tree.getChild(2).getChildCount(); + List colName = new ArrayList<>(numCols); + for (int i = 0; i < numCols; i++) { + colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); + } + return colName; } private void handlePartialPartitionSpec(Map partSpec, ColumnStatsAutoGatherContext context) throws @@ -218,7 +233,7 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getColumnTypes(Table tbl, List colNames) { + protected static List getColumnTypesByName(Table tbl, List colNames) { List colTypes = new ArrayList<>(); List cols = tbl.getCols(); Map colTypeMap = new HashMap<>(); @@ -263,10 +278,10 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = ColumnStatsSemanticAnalyzer.getColumnTypes(tbl, colNames); + StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, colNames, colTypes, conf, partTransformSpec, -1, partSpec, isPartitionStats, true); + tbl, statsCols.getColumnNames(), statsCols.getColumnTypes(), conf, partTransformSpec, -1, partSpec, + isPartitionStats, true); } private static String genRewrittenQuery(Table tbl, List colNames, List colTypes, @@ -640,7 +655,13 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { */ if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); + StatsEligibleColumns statsCols = null; + if (ast.getChildCount() == 2) { + statsCols = getStatsEligibleColumns(tbl); + colNames = statsCols.getColumnNames(); + } else { + colNames = getColumnName(ast); + } // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) @@ -659,7 +680,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = getColumnTypes(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -715,7 +736,13 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); + StatsEligibleColumns statsCols = null; + if (ast.getChildCount() == 2) { + statsCols = getStatsEligibleColumns(tbl); + colNames = statsCols.getColumnNames(); + } else { + colNames = getColumnName(ast); + } boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); @@ -732,7 +759,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = getColumnTypes(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -755,10 +782,9 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = getColumnTypes(tbl, colNames); - analyzeRewrite.setColName(colNames); - analyzeRewrite.setColType(colTypes); + StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); + analyzeRewrite.setColName(statsCols.getColumnNames()); + analyzeRewrite.setColType(statsCols.getColumnTypes()); return analyzeRewrite; } From c8ec783d82e9b1698c8830f37589ca16dc2982dc Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 27 Apr 2026 23:18:04 +0530 Subject: [PATCH 4/8] Fix sonarqube issue - 1 --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 31 +++++-------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 3f9c4489adc0..428e16090539 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,22 +103,7 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } - private static final class StatsEligibleColumns { - private final List columnNames; - private final List columnTypes; - - private StatsEligibleColumns(List columnNames, List columnTypes) { - this.columnNames = columnNames; - this.columnTypes = columnTypes; - } - - List getColumnNames() { - return columnNames; - } - - List getColumnTypes() { - return columnTypes; - } + private record StatsEligibleColumns(List columnNames, List columnTypes) { } /** @@ -280,7 +265,7 @@ protected static String genRewrittenQuery(Table tbl, boolean isPartitionStats) { StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, statsCols.getColumnNames(), statsCols.getColumnTypes(), conf, partTransformSpec, -1, partSpec, + tbl, statsCols.columnNames(), statsCols.columnTypes(), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } @@ -658,7 +643,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { StatsEligibleColumns statsCols = null; if (ast.getChildCount() == 2) { statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.getColumnNames(); + colNames = statsCols.columnNames(); } else { colNames = getColumnName(ast); } @@ -680,7 +665,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -739,7 +724,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) StatsEligibleColumns statsCols = null; if (ast.getChildCount() == 2) { statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.getColumnNames(); + colNames = statsCols.columnNames(); } else { colNames = getColumnName(ast); } @@ -759,7 +744,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -783,8 +768,8 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); - analyzeRewrite.setColName(statsCols.getColumnNames()); - analyzeRewrite.setColType(statsCols.getColumnTypes()); + analyzeRewrite.setColName(statsCols.columnNames()); + analyzeRewrite.setColType(statsCols.columnTypes()); return analyzeRewrite; } From bca3eeb17b5d9de5909f7a83797b592acd5dedec Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 27 Apr 2026 23:34:46 +0530 Subject: [PATCH 5/8] Fix sonarqube issue - 2 --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 428e16090539..14c88d5763f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -665,7 +665,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + colType = genRewrittenColumnTypes(ast, statsCols); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -744,7 +744,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + colType = genRewrittenColumnTypes(ast, statsCols); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -773,6 +773,10 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) return analyzeRewrite; } + private List genRewrittenColumnTypes(ASTNode ast, StatsEligibleColumns statsCols) { + return (ast.getChildCount() == 2) ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + } + @Override public void setQueryType(ASTNode tree) { queryProperties.setQueryType(QueryProperties.QueryType.STATS); From 84d81f933cf7ecd8112293895c3d3997cc7292f1 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 00:41:31 +0530 Subject: [PATCH 6/8] Refactor code to address review comments --- .../apache/hadoop/hive/ql/exec/Utilities.java | 8 ++ .../ql/parse/ColumnStatsSemanticAnalyzer.java | 110 ++++++++---------- 2 files changed, 58 insertions(+), 60 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 37e91652fb88..be5ebaf70e36 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2286,6 +2286,14 @@ public static List getColumnNamesFromFieldSchema(List partC return names; } + public static List getColumnTypesFromFieldSchema(List fieldSchemas) { + List types = new ArrayList(); + for (FieldSchema fs : fieldSchemas) { + types.add(fs.getType()); + } + return types; + } + public static List getInternalColumnNamesFromSignature(List colInfos) { List names = new ArrayList(); for (ColumnInfo ci : colInfos) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 14c88d5763f7..c9dfadb6ae75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,31 +103,26 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } - private record StatsEligibleColumns(List columnNames, List columnTypes) { - } - /** - * Get the names and types of the columns that support column statistics. + * Get the Field Schemas of the columns that support column statistics. */ - private static StatsEligibleColumns getStatsEligibleColumns(Table tbl) { - List colNames = new ArrayList<>(); - List colTypes = new ArrayList<>(); + private static List getStatsEligibleFieldSchemas(Table tbl) { + List result = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (isSupported) { - colNames.add(col.getName()); - colTypes.add(col.getType()); + result.add(col); } } - return new StatsEligibleColumns(colNames, colTypes); + return result; } - private List getColumnName(ASTNode tree) throws SemanticException { + private List getExplicitColumnNamesFromAst(ASTNode tree) throws SemanticException { if (tree.getChildCount() != 3) { - throw new SemanticException("Internal error. Expected number of children of ASTNode to be" - + " either 2 or 3. Found : " + tree.getChildCount()); + throw new SemanticException("Internal error. Expected number of children of ASTNode should be 3. Found : " + + tree.getChildCount()); } int numCols = tree.getChild(2).getChildCount(); List colName = new ArrayList<>(numCols); @@ -218,33 +213,27 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getColumnTypesByName(Table tbl, List colNames) { - List colTypes = new ArrayList<>(); + protected static List getFieldSchemasByColName(Table tbl, List colNames) { List cols = tbl.getCols(); - Map colTypeMap = new HashMap<>(); - + Map colFsMap = new HashMap<>(); for (FieldSchema col : cols) { - colTypeMap.put(col.getName().toLowerCase(), col.getType()); + colFsMap.put(col.getName().toLowerCase(), col); } - - List primColNames = new ArrayList<>(); + List result = new ArrayList<>(); for (String colName : colNames) { - String type = colTypeMap.get(colName.toLowerCase()); - if (type != null) { + FieldSchema fs = colFsMap.get(colName.toLowerCase()); + if (fs != null) { + String type = fs.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (!isSupported) { logTypeWarning(colName, type); } else { - primColNames.add(colName); - colTypes.add(type); + result.add(fs); } } } - - colNames.clear(); - colNames.addAll(primColNames); - return colTypes; + return result; } private String genRewrittenQuery(List colNames, List colTypes, HiveConf conf, @@ -263,9 +252,10 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, statsCols.columnNames(), statsCols.columnTypes(), conf, partTransformSpec, -1, partSpec, + tbl, Utilities.getColumnNamesFromFieldSchema(columnSchemas), + Utilities.getColumnTypesFromFieldSchema(columnSchemas), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } @@ -640,14 +630,6 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { */ if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); - StatsEligibleColumns statsCols = null; - if (ast.getChildCount() == 2) { - statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.columnNames(); - } else { - colNames = getColumnName(ast); - } - // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); @@ -655,9 +637,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { Map> partTransformSpecs = Collections.singletonMap(-1, null); Map partSpec = (isPartitionStats) ? AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf) : null; - checkForPartitionColumns( - colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumns(ast); if (isPartitionStats) { handlePartialPartitionSpec(partSpec, null); @@ -665,7 +646,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = genRewrittenColumnTypes(ast, statsCols); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -721,21 +703,13 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) tbl = AnalyzeCommandUtils.getTable(ast, this); - StatsEligibleColumns statsCols = null; - if (ast.getChildCount() == 2) { - statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.columnNames(); - } else { - colNames = getColumnName(ast); - } boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); List partTransformSpec = null; Map partSpec = null; - checkForPartitionColumns(colNames, - Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumns(ast); if (isPartitionStats) { partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); @@ -744,7 +718,8 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = genRewrittenColumnTypes(ast, statsCols); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -754,6 +729,25 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) return rewrittenTree; } + protected List getColumns(ASTNode ast) throws SemanticException { + List statsEligibleFS = null; + List colNames; + if (ast.getChildCount() == 2) { + statsEligibleFS = getStatsEligibleFieldSchemas(tbl); + colNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + } else{ + colNames = getExplicitColumnNamesFromAst(ast); + } + + checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(colNames); + + if (statsEligibleFS != null) { + return statsEligibleFS; + } + return getFieldSchemasByColName(tbl, colNames); + } + AnalyzeRewriteContext getAnalyzeRewriteContext() { AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); @@ -767,16 +761,12 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); - analyzeRewrite.setColName(statsCols.columnNames()); - analyzeRewrite.setColType(statsCols.columnTypes()); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); + analyzeRewrite.setColName(Utilities.getColumnNamesFromFieldSchema(columnSchemas)); + analyzeRewrite.setColType(Utilities.getColumnTypesFromFieldSchema(columnSchemas)); return analyzeRewrite; } - private List genRewrittenColumnTypes(ASTNode ast, StatsEligibleColumns statsCols) { - return (ast.getChildCount() == 2) ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); - } - @Override public void setQueryType(ASTNode tree) { queryProperties.setQueryType(QueryProperties.QueryType.STATS); From 4a1205ac0f2136e8a221d8e97f2bb6273c958f5d Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 03:33:31 +0530 Subject: [PATCH 7/8] Fix SonarQube issue - 3 --- .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index be5ebaf70e36..7fa8f3557722 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2287,7 +2287,7 @@ public static List getColumnNamesFromFieldSchema(List partC } public static List getColumnTypesFromFieldSchema(List fieldSchemas) { - List types = new ArrayList(); + List types = new ArrayList<>(); for (FieldSchema fs : fieldSchemas) { types.add(fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index c9dfadb6ae75..d999712fe4a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -638,7 +638,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { Map partSpec = (isPartitionStats) ? AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf) : null; - List columnSchemas = getColumns(ast); + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { handlePartialPartitionSpec(partSpec, null); @@ -709,7 +709,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) List partTransformSpec = null; Map partSpec = null; - List columnSchemas = getColumns(ast); + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); @@ -729,23 +729,23 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) return rewrittenTree; } - protected List getColumns(ASTNode ast) throws SemanticException { + protected List getColumnsFromAst(ASTNode ast) throws SemanticException { List statsEligibleFS = null; - List colNames; + List columnNames; if (ast.getChildCount() == 2) { statsEligibleFS = getStatsEligibleFieldSchemas(tbl); - colNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + columnNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); } else{ - colNames = getExplicitColumnNamesFromAst(ast); + columnNames = getExplicitColumnNamesFromAst(ast); } - checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + checkForPartitionColumns(columnNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(columnNames); if (statsEligibleFS != null) { return statsEligibleFS; } - return getFieldSchemasByColName(tbl, colNames); + return getFieldSchemasByColName(tbl, columnNames); } AnalyzeRewriteContext getAnalyzeRewriteContext() { From b50be7ac9781922afb65d7690f89fdf6a9381a94 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 12:13:00 +0530 Subject: [PATCH 8/8] Fix for column names being lowercased --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index d999712fe4a4..6fde2b5980b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -229,7 +229,7 @@ protected static List getFieldSchemasByColName(Table tbl, List