From 51f50334d2312da29d3929d491226e3138a7f44b Mon Sep 17 00:00:00 2001 From: csun5285 Date: Tue, 2 Jun 2026 17:44:31 +0800 Subject: [PATCH 1/2] [fix](function) make element_at support struct, drop struct_element Subscript access on a struct, s['field'], is parsed directly into an element_at expression, but element_at only supported ARRAY and MAP and aborted on a DCHECK in the BE (DORIS-26105). Rather than routing struct field access through a separate struct_element function, make element_at the single entry point for array/map/struct field access. BE: FunctionArrayElement (element_at) now resolves a struct field by its constant int/string index and the standalone struct_element function is removed. The struct_element name is registered as a plain extra name on the same implementation (not an alias: element_at is in the variadic set because FunctionVariantElement shares the name, so an alias key would be mangled to element_atStructString and fail to resolve), keeping any persisted plan that still references struct_element resolvable. FE: the StructElement node is removed and every struct field access form (s['f'], s.f, element_at(s,'f'), struct_element(s,'f')) converges on ElementAt, which absorbs the struct signature, legality check, int->name normalization and the nested-column-pruning access path. struct_element remains a user-facing SQL name bound to ElementAt. Co-Authored-By: Claude Opus 4.8 --- .../function/array/function_array_element.cpp | 3 + .../function/array/function_array_element.h | 118 ++++++++++++++ .../function/function_struct_element.cpp | 148 ------------------ .../exprs/function/simple_function_factory.h | 2 - .../doris/catalog/BuiltinScalarFunctions.java | 4 +- .../rules/analysis/BindExpression.java | 4 +- .../rules/analysis/ExpressionAnalyzer.java | 7 +- .../expression/ExpressionNormalization.java | 4 +- .../rules/expression/ExpressionRuleType.java | 2 +- ...ctElement.java => NormalizeElementAt.java} | 31 ++-- .../rules/StringEmptyToLengthRule.java | 2 +- .../AccessPathExpressionCollector.java | 47 +++--- .../rewrite/PushDownUnnestInProject.java | 4 +- .../functions/scalar/ElementAt.java | 56 +++++-- .../functions/scalar/StructElement.java | 117 -------------- .../visitor/ScalarFunctionVisitor.java | 5 - .../rules/StringEmptyToLengthRuleTest.java | 8 +- .../rules/rewrite/PruneNestedColumnTest.java | 10 +- .../rewrite/PushDownUnnestInProjectTest.java | 4 +- .../test_struct_functions.out | 35 +++++ .../test_struct_functions.groovy | 21 +++ 21 files changed, 282 insertions(+), 350 deletions(-) delete mode 100644 be/src/exprs/function/function_struct_element.cpp rename fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/{NormalizeStructElement.java => NormalizeElementAt.java} (69%) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StructElement.java diff --git a/be/src/exprs/function/array/function_array_element.cpp b/be/src/exprs/function/array/function_array_element.cpp index 06521b19edf5d8..3ff6d94bd22ec1 100644 --- a/be/src/exprs/function/array/function_array_element.cpp +++ b/be/src/exprs/function/array/function_array_element.cpp @@ -24,6 +24,9 @@ namespace doris { void register_function_array_element(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_alias(FunctionArrayElement::name, "%element_extract%"); + // Keep "struct_element" (old FE / persisted plans) resolvable; register it as its own name, not + // an alias, since element_at is variadic and an alias key would be mangled and miss. + factory.register_function("struct_element"); } } // namespace doris diff --git a/be/src/exprs/function/array/function_array_element.h b/be/src/exprs/function/array/function_array_element.h index 065f0f5fd18b5e..b907fc4834ca4d 100644 --- a/be/src/exprs/function/array/function_array_element.h +++ b/be/src/exprs/function/array/function_array_element.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ #include "core/data_type/data_type_map.h" #include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_struct.h" #include "core/data_type/primitive_type.h" #include "core/types.h" #include "exprs/function/function.h" @@ -77,6 +79,31 @@ class FunctionArrayElement : public IFunction { size_t get_number_of_arguments() const override { return 2; } + // A struct field access (element_at(struct, const) / the struct_element alias) resolves to a + // different return type depending on which field the constant index selects, so we need the + // index column here. Array/map return types depend only on the argument types and fall through + // to the DataTypes-based overload below. + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { + DataTypePtr arg_0 = remove_nullable(arguments[0].type); + if (arg_0->get_primitive_type() == TYPE_STRUCT) { + const auto* struct_type = check_and_get_data_type(arg_0.get()); + size_t index = 0; + // The index is always a constant literal for struct field access; if it cannot be + // resolved (out of bound / unknown name) we return nullptr and the framework reports it. + if (!get_struct_element_index(*struct_type, arguments[1].column, arguments[1].type, + &index) + .ok()) { + return nullptr; + } + return make_nullable(struct_type->get_elements()[index]); + } + DataTypes data_types(arguments.size()); + for (size_t i = 0; i < arguments.size(); ++i) { + data_types[i] = arguments[i].type; + } + return get_return_type_impl(data_types); + } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { DataTypePtr arg_0 = remove_nullable(arguments[0]); DCHECK(arg_0->get_primitive_type() == TYPE_ARRAY || arg_0->get_primitive_type() == TYPE_MAP) @@ -101,6 +128,10 @@ class FunctionArrayElement : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { + if (remove_nullable(block.get_by_position(arguments[0]).type)->get_primitive_type() == + TYPE_STRUCT) { + return _execute_struct(block, arguments, result, input_rows_count); + } auto dst_null_column = ColumnUInt8::create(input_rows_count, 0); UInt8* dst_null_map = dst_null_column->get_data().data(); const UInt8* src_null_map = nullptr; @@ -136,6 +167,93 @@ class FunctionArrayElement : public IFunction { } private: + //=========================== struct element===========================// + // Resolve the 0-based field offset selected by a constant int/string index. Mirrors the logic + // of the former struct_element function, which element_at now subsumes. + Status get_struct_element_index(const DataTypeStruct& struct_type, + const ColumnPtr& index_column, const DataTypePtr& index_type, + size_t* result) const { + if (!index_column) { + return Status::RuntimeError("Function {}: second argument column is nullptr.", + get_name()); + } + size_t index = 0; + if (is_int_or_bool(index_type->get_primitive_type())) { + int64_t offset = index_column->get_int(0); + size_t limit = struct_type.get_elements().size() + 1; + if (offset < 1 || offset >= static_cast(limit)) { + return Status::RuntimeError( + "Index out of bound for function {}: index {} should base from 1 and less " + "than {}.", + get_name(), offset, limit); + } + index = offset - 1; // the index starts from 1 + } else if (is_string_type(index_type->get_primitive_type())) { + std::string field_name = index_column->get_data_at(0).to_string(); + std::optional pos = struct_type.try_get_position_by_name(field_name); + if (!pos.has_value()) { + return Status::RuntimeError( + "Element not found for function {}: name {} not found in {}.", get_name(), + field_name, struct_type.get_name()); + } + index = pos.value(); + } else { + return Status::RuntimeError( + "Argument not supported for function {}: second arg type {} should be int or " + "string.", + get_name(), index_type->get_name()); + } + *result = index; + return Status::OK(); + } + + Status _execute_struct(Block& block, const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) const { + const auto& struct_arg = block.get_by_position(arguments[0]); + ColumnPtr struct_col_ptr = struct_arg.column->convert_to_full_column_if_const(); + // element_at manages nulls itself (use_default_implementation_for_nulls() == false), so a + // null struct row must be merged into the result null map manually. + const ColumnUInt8* outer_null_map = nullptr; + if (struct_col_ptr->is_nullable()) { + const auto* nullable = assert_cast(struct_col_ptr.get()); + outer_null_map = &nullable->get_null_map_column(); + struct_col_ptr = nullable->get_nested_column_ptr(); + } + const auto* struct_type = + check_and_get_data_type(remove_nullable(struct_arg.type).get()); + const auto* struct_col = check_and_get_column(struct_col_ptr.get()); + if (!struct_col || !struct_type) { + return Status::RuntimeError("unsupported types for function {}({}, {})", get_name(), + struct_arg.type->get_name(), + block.get_by_position(arguments[1]).type->get_name()); + } + const auto& index_arg = block.get_by_position(arguments[1]); + size_t index = 0; + RETURN_IF_ERROR( + get_struct_element_index(*struct_type, index_arg.column, index_arg.type, &index)); + + ColumnPtr field_col = struct_col->get_column_ptr(index); + auto res_null_column = ColumnUInt8::create(input_rows_count, 0); + auto& res_null_map = res_null_column->get_data(); + ColumnPtr res_nested = field_col; + if (field_col->is_nullable()) { + const auto* field_nullable = assert_cast(field_col.get()); + const auto& field_null_map = field_nullable->get_null_map_column().get_data(); + memcpy(res_null_map.data(), field_null_map.data(), input_rows_count); + res_nested = field_nullable->get_nested_column_ptr(); + } + if (outer_null_map) { + const auto& outer = outer_null_map->get_data(); + for (size_t i = 0; i < input_rows_count; ++i) { + res_null_map[i] |= outer[i]; + } + } + block.replace_by_position( + result, ColumnNullable::create(res_nested->clone_resized(input_rows_count), + std::move(res_null_column))); + return Status::OK(); + } + //=========================== map element===========================// ColumnPtr _get_mapped_idx(const ColumnArray& column, const ColumnWithTypeAndName& argument) const { diff --git a/be/src/exprs/function/function_struct_element.cpp b/be/src/exprs/function/function_struct_element.cpp deleted file mode 100644 index 770698259ee1be..00000000000000 --- a/be/src/exprs/function/function_struct_element.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include -#include - -#include "common/status.h" -#include "core/block/block.h" -#include "core/column/column.h" -#include "core/column/column_const.h" -#include "core/column/column_nullable.h" -#include "core/column/column_string.h" -#include "core/column/column_struct.h" -#include "core/data_type/data_type.h" -#include "core/data_type/data_type_nothing.h" -#include "core/data_type/data_type_nullable.h" -#include "core/data_type/data_type_struct.h" -#include "exprs/function/function.h" -#include "exprs/function/function_helpers.h" -#include "exprs/function/simple_function_factory.h" - -namespace doris { - -class FunctionStructElement : public IFunction { -public: - static constexpr auto name = "struct_element"; - static FunctionPtr create() { return std::make_shared(); } - - // Get function name. - String get_name() const override { return name; } - - size_t get_number_of_arguments() const override { return 2; } - - ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } - - DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { - DCHECK(arguments[0].type->get_primitive_type() == TYPE_STRUCT) - << "First argument for function: " << name - << " should be DataTypeStruct but it has type " << arguments[0].type->get_name() - << "."; - DCHECK(is_int_or_bool(arguments[1].type->get_primitive_type()) || - is_string_type(arguments[1].type->get_primitive_type())) - << "Second argument for function: " << name - << " should be Int or String but it has type " << arguments[1].type->get_name() - << "."; - - const auto* struct_type = check_and_get_data_type(arguments[0].type.get()); - - auto index_type = arguments[1].type; - const auto& index_column = arguments[1].column; - if (!index_column) { - throw doris::Exception( - ErrorCode::INTERNAL_ERROR, - "Function {}: second argument column is nullptr, but it should not be.", - get_name()); - } - size_t index; - auto st = get_element_index(*struct_type, index_column, index_type, &index); - if (!st.ok()) { - // will handle nullptr outside - return nullptr; - } - // The struct_element is marked as AlwaysNullable in fe. - return make_nullable(struct_type->get_elements()[index]); - } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count) const override { - const auto* struct_type = check_and_get_data_type( - block.get_by_position(arguments[0]).type.get()); - const auto* struct_col = check_and_get_column( - block.get_by_position(arguments[0]).column.get()); - if (!struct_col || !struct_type) { - return Status::RuntimeError( - fmt::format("unsupported types for function {}({}, {})", get_name(), - block.get_by_position(arguments[0]).type->get_name(), - block.get_by_position(arguments[1]).type->get_name())); - } - - auto index_column = block.get_by_position(arguments[1]).column; - auto index_type = block.get_by_position(arguments[1]).type; - size_t index; - RETURN_IF_ERROR(get_element_index(*struct_type, index_column, index_type, &index)); - ColumnPtr res_column = struct_col->get_column_ptr(index); - ColumnPtr ele_column = res_column->clone_resized(res_column->size()); - //This function must return a ColumnNullable column, so it is necessary to convert the result column into ColumnNullable. - block.replace_by_position(result, make_nullable(ele_column)); - return Status::OK(); - } - -private: - Status get_element_index(const DataTypeStruct& struct_type, const ColumnPtr& index_column, - const DataTypePtr& index_type, size_t* result) const { - size_t index; - if (is_int_or_bool(index_type->get_primitive_type())) { - index = index_column->get_int(0); - size_t limit = struct_type.get_elements().size() + 1; - if (index < 1 || index >= limit) { - return Status::RuntimeError( - fmt::format("Index out of bound for function {}: index {} should base from " - "1 and less than {}.", - get_name(), index, limit)); - } - index -= 1; // the index start from 1 - } else if (is_string_type(index_type->get_primitive_type())) { - std::string field_name = index_column->get_data_at(0).to_string(); - std::optional pos = struct_type.try_get_position_by_name(field_name); - if (!pos.has_value()) { - return Status::RuntimeError( - fmt::format("Element not found for function {}: name {} not found in {}.", - get_name(), field_name, struct_type.get_name())); - } - index = pos.value(); - } else { - return Status::RuntimeError( - fmt::format("Argument not supported for function {}: second arg type {} should " - "be int or string.", - get_name(), index_type->get_name())); - } - *result = index; - return Status::OK(); - } -}; - -void register_function_struct_element(SimpleFunctionFactory& factory) { - factory.register_function(); -} - -} // namespace doris diff --git a/be/src/exprs/function/simple_function_factory.h b/be/src/exprs/function/simple_function_factory.h index 1d7e26fe5593cf..c0c3fc1ad60365 100644 --- a/be/src/exprs/function/simple_function_factory.h +++ b/be/src/exprs/function/simple_function_factory.h @@ -92,7 +92,6 @@ void register_function_fake(SimpleFunctionFactory& factory); void register_function_array(SimpleFunctionFactory& factory); void register_function_map(SimpleFunctionFactory& factory); void register_function_struct(SimpleFunctionFactory& factory); -void register_function_struct_element(SimpleFunctionFactory& factory); void register_function_variant_element(SimpleFunctionFactory& factory); void register_function_geo(SimpleFunctionFactory& factory); void register_function_multi_string_position(SimpleFunctionFactory& factory); @@ -334,7 +333,6 @@ class SimpleFunctionFactory { register_function_array(instance); register_function_map(instance); register_function_struct(instance); - register_function_struct_element(instance); register_function_geo(instance); register_function_url(instance); register_function_multi_string_position(instance); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index 4478bc9bcfe303..43680104fbd871 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -507,7 +507,6 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap; import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp; import org.apache.doris.nereids.trees.expressions.functions.scalar.StripNullValue; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace; @@ -773,7 +772,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Dround.class, "dround"), scalar(Dsqrt.class, "dsqrt"), scalar(E.class, "e"), - scalar(ElementAt.class, "element_at"), + scalar(ElementAt.class, "element_at", "struct_element"), scalar(Elt.class, "elt"), scalar(Embed.class, "embed"), scalar(EncodeAsSmallInt.class, "encode_as_smallint"), @@ -1045,7 +1044,6 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Sin.class, "sin"), scalar(Sinh.class, "sinh"), scalar(Sleep.class, "sleep"), - scalar(StructElement.class, "struct_element"), scalar(Sm3.class, "sm3"), scalar(Sm3sum.class, "sm3sum"), scalar(Sm4Decrypt.class, "sm4_decrypt"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java index d367d093c366bb..e105d24f75c1ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java @@ -63,7 +63,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AnyValue; import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction; import org.apache.doris.nereids.trees.expressions.functions.generator.Unnest; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.table.TableValuedFunction; import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral; import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; @@ -435,7 +435,7 @@ private LogicalPlan bindGenerate(MatchingContext> ctx) { // struct_element(#expand_col#v, #v) as #v List fields = ((StructType) boundSlot.getDataType()).getFields(); for (int idx = 0; idx < fields.size(); ++idx) { - expandAlias.add(new Alias(new StructElement( + expandAlias.add(new Alias(new ElementAt( boundSlot, new StringLiteral(fields.get(idx).getName())), generate.getExpandColumnAlias().get(i).get(idx), slot.getQualifier())); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java index c2d3c3f4a4b715..646e93cfc9a84a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java @@ -77,7 +77,6 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.SupportMultiDistinct; import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdfBuilder; import org.apache.doris.nereids.trees.expressions.functions.udf.UdfBuilder; import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral; @@ -270,7 +269,7 @@ public Expression visitDereferenceExpression(DereferenceExpression dereferenceEx StructType structType = (StructType) dataType; StructField field = structType.getField(dereferenceExpression.fieldName); if (field != null) { - return new StructElement(expression, dereferenceExpression.child(1)); + return new ElementAt(expression, dereferenceExpression.child(1)); } } else if (dataType.isMapType()) { return new ElementAt(expression, dereferenceExpression.child(1)); @@ -339,7 +338,7 @@ public Expression visitUnboundSlot(UnboundSlot unboundSlot, ExpressionRewriteCon if (firstBound.getDataType() instanceof NestedColumnPrunable || firstBound.getDataType().isVariantType()) { context.cascadesContext.getStatementContext().setHasNestedColumns(true); - } else if (firstBound.containsType(ElementAt.class, StructElement.class)) { + } else if (firstBound.containsType(ElementAt.class)) { context.cascadesContext.getStatementContext().setHasNestedColumns(true); } if (firstBound instanceof Alias) { @@ -1257,7 +1256,7 @@ private Optional bindNestedFields(UnboundSlot unboundSlot, Slot slot throw new AnalysisException("No such struct field '" + fieldName + "' in '" + lastFieldName + "'"); } lastFieldName = fieldName; - expression = new StructElement(expression, new StringLiteral(fieldName)); + expression = new ElementAt(expression, new StringLiteral(fieldName)); continue; } else if (dataType.isMapType()) { expression = new ElementAt(expression, new StringLiteral(fieldName)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java index e133303a2c1d25..c6b129e8c5088b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java @@ -29,7 +29,7 @@ import org.apache.doris.nereids.rules.expression.rules.MedianConvert; import org.apache.doris.nereids.rules.expression.rules.MergeDateTrunc; import org.apache.doris.nereids.rules.expression.rules.NormalizeBinaryPredicatesRule; -import org.apache.doris.nereids.rules.expression.rules.NormalizeStructElement; +import org.apache.doris.nereids.rules.expression.rules.NormalizeElementAt; import org.apache.doris.nereids.rules.expression.rules.RewriteDefaultExpression; import org.apache.doris.nereids.rules.expression.rules.SimplifyArithmeticComparisonRule; import org.apache.doris.nereids.rules.expression.rules.SimplifyArithmeticRule; @@ -74,7 +74,7 @@ public class ExpressionNormalization extends ExpressionRewrite { SimplifyArithmeticComparisonRule.INSTANCE, ConvertAggStateCast.INSTANCE, MergeDateTrunc.INSTANCE, - NormalizeStructElement.INSTANCE, + NormalizeElementAt.INSTANCE, CheckCast.INSTANCE, SimplifyEqualBooleanLiteral.INSTANCE ) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java index 726026cccbf869..b2e44c81c1620a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java @@ -65,7 +65,7 @@ public enum ExpressionRuleType { SIMPLIFY_RANGE, SIMPLIFY_SELF_COMPARISON, SUPPORT_JAVA_DATE_FORMATTER, - NORMALIZE_STRUCT_ELEMENT, + NORMALIZE_ELEMENT_AT, TIMESTAMP_TO_ADD_TIME, TOPN_TO_MAX, REWRITE_DEFAULT_EXPRESSION, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeStructElement.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeElementAt.java similarity index 69% rename from fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeStructElement.java rename to fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeElementAt.java index dc50921bc1e8cc..6388c1830aeb27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeStructElement.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/NormalizeElementAt.java @@ -21,7 +21,7 @@ import org.apache.doris.nereids.rules.expression.ExpressionPatternRuleFactory; import org.apache.doris.nereids.rules.expression.ExpressionRuleType; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral; import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; import org.apache.doris.nereids.types.StructField; @@ -32,35 +32,38 @@ import java.util.List; /** - * given column s has data type: struct<a: int, b: int>, if exists struct_element(s, 2), we will rewrite - * to struct_element(s, 'b') + * given column s has data type: struct<a: int, b: int>, if exists element_at(s, 2), we will rewrite + * to element_at(s, 'b'). Only applies to element_at over a struct; array/map keep their int index. */ -public class NormalizeStructElement implements ExpressionPatternRuleFactory { - public static final NormalizeStructElement INSTANCE = new NormalizeStructElement(); +public class NormalizeElementAt implements ExpressionPatternRuleFactory { + public static final NormalizeElementAt INSTANCE = new NormalizeElementAt(); @Override public List> buildRules() { return ImmutableList.of( - matchesType(StructElement.class).then(NormalizeStructElement::normalize) - .toRule(ExpressionRuleType.NORMALIZE_STRUCT_ELEMENT) + matchesType(ElementAt.class).then(NormalizeElementAt::normalize) + .toRule(ExpressionRuleType.NORMALIZE_ELEMENT_AT) ); } - private static StructElement normalize(StructElement structElement) { - Expression field = structElement.getArgument(1); + private static Expression normalize(ElementAt elementAt) { + if (!(elementAt.child(0).getDataType() instanceof StructType)) { + return elementAt; + } + Expression field = elementAt.getArgument(1); if (field instanceof IntegerLikeLiteral) { int fieldIndex = ((Number) ((IntegerLikeLiteral) field).getValue()).intValue(); - StructType structType = (StructType) structElement.getArgument(0).getDataType(); + StructType structType = (StructType) elementAt.child(0).getDataType(); List fields = structType.getFields(); - if (fieldIndex >= 0 && fieldIndex <= fields.size()) { - return structElement.withChildren( + if (fieldIndex >= 1 && fieldIndex <= fields.size()) { + return elementAt.withChildren( ImmutableList.of( - structElement.child(0), + elementAt.child(0), new StringLiteral(fields.get(fieldIndex - 1).getName()) ) ); } } - return structElement; + return elementAt; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRule.java index 5321eed9300229..f2a66b538cddde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRule.java @@ -47,7 +47,7 @@ * to {@code length(s) = 0}; for NULL, both sides evaluate to NULL. * * Applies when the compared expression is a non-literal expression of string-like type (e.g. a - * {@code SlotReference}, a {@code StructElement} field access, etc.), so that the resulting + * {@code SlotReference}, an {@code ElementAt} struct field access, etc.), so that the resulting * {@code length(expr)} call can benefit from OFFSET-only column reading. */ public class StringEmptyToLengthRule implements ExpressionPatternRuleFactory { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java index 680eab7796c997..1c50429ec11993 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java @@ -54,7 +54,6 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.MapKeys; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapSize; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapValues; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; @@ -314,37 +313,27 @@ public Void visitElementAt(ElementAt elementAt, CollectorContext context) { return continueCollectAccessPath(first, context); } return visit(elementAt, context); - } else { - return visit(elementAt, context); - } - } - - // struct element_at - @Override - public Void visitStructElement(StructElement structElement, CollectorContext context) { - List arguments = structElement.getArguments(); - Expression struct = arguments.get(0); - Expression fieldName = arguments.get(1); - DataType fieldType = fieldName.getDataType(); - - if (fieldName.isLiteral() && (fieldType.isIntegerLikeType() || fieldType.isStringLikeType())) { - if (fieldType.isIntegerLikeType()) { - int fieldIndex = ((Number) ((Literal) fieldName).getValue()).intValue(); - List fields = ((StructType) struct.getDataType()).getFields(); - if (fieldIndex >= 1 && fieldIndex <= fields.size()) { - String realFieldName = fields.get(fieldIndex - 1).getName(); - context.accessPathBuilder.addPrefix(realFieldName); - return continueCollectAccessPath(struct, context); + } else if (first.getDataType().isStructType()) { + // struct field access (formerly struct_element): collect the selected field as the path + Expression fieldName = arguments.get(1); + DataType fieldType = fieldName.getDataType(); + if (fieldName.isLiteral() && (fieldType.isIntegerLikeType() || fieldType.isStringLikeType())) { + if (fieldType.isIntegerLikeType()) { + int fieldIndex = ((Number) ((Literal) fieldName).getValue()).intValue(); + List fields = ((StructType) first.getDataType()).getFields(); + if (fieldIndex >= 1 && fieldIndex <= fields.size()) { + String realFieldName = fields.get(fieldIndex - 1).getName(); + context.accessPathBuilder.addPrefix(realFieldName); + return continueCollectAccessPath(first, context); + } } + context.accessPathBuilder.addPrefix(((Literal) fieldName).getStringValue().toLowerCase()); + return continueCollectAccessPath(first, context); } - context.accessPathBuilder.addPrefix(((Literal) fieldName).getStringValue().toLowerCase()); - return continueCollectAccessPath(struct, context); - } - - for (Expression argument : arguments) { - visit(argument, context); + return visit(elementAt, context); + } else { + return visit(elementAt, context); } - return null; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProject.java index ebe0ba7b4c24a4..415dd7900c068b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProject.java @@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.Function; import org.apache.doris.nereids.trees.expressions.functions.generator.Unnest; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; import org.apache.doris.nereids.trees.plans.logical.LogicalGenerate; import org.apache.doris.nereids.types.DataType; @@ -81,7 +81,7 @@ public Rule build() { toBePushedDown.size(), fields.size())); for (int i = 0; i < fields.size(); ++i) { Slot remainExpr = slotTripletMap.get(toBePushedDown.get(i)).remainExpr; - newProjects.add(new Alias(remainExpr.getExprId(), new StructElement( + newProjects.add(new Alias(remainExpr.getExprId(), new ElementAt( outputSlot, new StringLiteral(fields.get(i).getName())), remainExpr.getName())); } } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java index 1b9999066ea5a6..b9b0f24afa15e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java @@ -18,17 +18,22 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.PreferPushDownProject; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.RewriteWhenAnalyze; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; +import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral; +import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StructField; import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.types.VariantType; @@ -45,7 +50,7 @@ */ public class ElementAt extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable, - RewriteWhenAnalyze, PreferPushDownProject { + PropagateNullLiteral, PreferPushDownProject { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(new FollowToAnyDataType(0)) @@ -85,19 +90,52 @@ public R accept(ExpressionVisitor visitor, C context) { } @Override - public List getSignatures() { + public void checkLegalityBeforeTypeCoercion() { + // Struct field access (the former struct_element) only accepts a constant int/string index, + // since the selected field — and therefore the result type — must be known at analysis time. if (child(0).getDataType() instanceof StructType) { - return new StructElement(child(0), child(1)).getSignatures(); + Expression field = getArgument(1); + if (!(field instanceof StringLikeLiteral || field instanceof IntegerLikeLiteral)) { + throw new AnalysisException("element_at over a struct only allows a constant int or" + + " string second parameter: " + this.toSql()); + } } - return SIGNATURES; } @Override - public Expression rewriteWhenAnalyze() { - if (child(0).getDataType() instanceof StructType) { - return new StructElement(child(0), child(1)); + public List getSignatures() { + DataType arg0Type = child(0).getDataType(); + if (arg0Type instanceof NullType) { + return ImmutableList.of( + FunctionSignature.ret(NullType.INSTANCE).args(NullType.INSTANCE, child(1).getDataType())); + } + if (arg0Type instanceof StructType) { + return ImmutableList.of(FunctionSignature.ret(structFieldType((StructType) arg0Type)) + .args(arg0Type, child(1).getDataType())); + } + return SIGNATURES; + } + + // Resolve the type of the struct field selected by the constant int/string index. + private DataType structFieldType(StructType structType) { + Expression field = getArgument(1); + if (field instanceof IntegerLikeLiteral) { + int offset = ((IntegerLikeLiteral) field).getIntValue(); + if (offset <= 0 || offset > structType.getFields().size()) { + throw new AnalysisException("the specified field index out of bound: " + this.toSql()); + } + return structType.getFields().get(offset - 1).getDataType(); + } else if (field instanceof StringLikeLiteral) { + String name = ((StringLikeLiteral) field).getStringValue(); + StructField structField = structType.getField(name); + if (structField == null) { + throw new AnalysisException("the specified field name " + name + " was not found: " + this.toSql()); + } + return structField.getDataType(); + } else { + throw new AnalysisException("element_at over a struct only allows a constant int or" + + " string second parameter: " + this.toSql()); } - return this; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StructElement.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StructElement.java deleted file mode 100644 index 0b4b9a8b1bf0c3..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StructElement.java +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.trees.expressions.functions.scalar; - -import org.apache.doris.catalog.FunctionSignature; -import org.apache.doris.nereids.exceptions.AnalysisException; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.PreferPushDownProject; -import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; -import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; -import org.apache.doris.nereids.trees.expressions.functions.SearchSignature; -import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral; -import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; -import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.types.NullType; -import org.apache.doris.nereids.types.StructField; -import org.apache.doris.nereids.types.StructType; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; - -import java.util.List; - -/** - * ScalarFunction 'struct_element'. - */ -public class StructElement extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, PreferPushDownProject { - - /** - * constructor with 0 or more arguments. - */ - public StructElement(Expression arg0, Expression arg1) { - super("struct_element", arg0, arg1); - } - - /** constructor for withChildren and reuse signature */ - private StructElement(ScalarFunctionParams functionParams) { - super(functionParams); - } - - @Override - public void checkLegalityBeforeTypeCoercion() { - Expression structArgument = getArgument(0); - Expression fieldArgument = getArgument(1); - if (!(structArgument.getDataType() instanceof StructType - || structArgument.getDataType() instanceof NullType)) { - SearchSignature.throwCanNotFoundFunctionException(this.getName(), this.getArguments()); - } - if (!(fieldArgument instanceof StringLikeLiteral || fieldArgument instanceof IntegerLikeLiteral)) { - throw new AnalysisException("struct_element only allows" - + " constant int or string second parameter: " + this.toSql()); - } - } - - /** - * withChildren. - */ - @Override - public StructElement withChildren(List children) { - Preconditions.checkArgument(children.size() == 2, "children size should be 2"); - return new StructElement(getFunctionParams(children)); - } - - @Override - public R accept(ExpressionVisitor visitor, C context) { - return visitor.visitStructElement(this, context); - } - - @Override - public List getSignatures() { - if (getArgument(0).getDataType() instanceof NullType) { - return ImmutableList.of( - FunctionSignature.ret(NullType.INSTANCE).args(NullType.INSTANCE, child(1).getDataType()) - ); - } - StructType structArgType = (StructType) getArgument(0).getDataType(); - DataType retType; - if (getArgument(1) instanceof IntegerLikeLiteral) { - int offset = ((IntegerLikeLiteral) getArgument(1)).getIntValue(); - if (offset <= 0 || offset > structArgType.getFields().size()) { - throw new AnalysisException("the specified field index out of bound: " + this.toSql()); - } else { - retType = structArgType.getFields().get(offset - 1).getDataType(); - } - } else if (getArgument(1) instanceof StringLikeLiteral) { - String name = ((StringLikeLiteral) getArgument(1)).getStringValue(); - StructField field = structArgType.getField(name); - if (field == null) { - throw new AnalysisException("the specified field name " + name + " was not found: " + this.toSql()); - } else { - retType = field.getDataType(); - } - } else { - throw new AnalysisException("struct_element only allows" - + " constant int or string second parameter: " + this.toSql()); - } - return ImmutableList.of(FunctionSignature.ret(retType).args(structArgType, child(1).getDataType())); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 1f82a48f1d7d89..5023f34b2d8859 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -527,7 +527,6 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap; import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp; import org.apache.doris.nereids.trees.expressions.functions.scalar.StripNullValue; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace; @@ -2882,10 +2881,6 @@ default R visitCreateNamedStruct(CreateNamedStruct createNamedStruct, C context) return visitScalarFunction(createNamedStruct, context); } - default R visitStructElement(StructElement structElement, C context) { - return visitScalarFunction(structElement, context); - } - default R visitMultiMatch(MultiMatch multiMatch, C context) { return visitScalarFunction(multiMatch, context); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRuleTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRuleTest.java index f9c2a1a2072e8e..efe59fde62e15a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRuleTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/StringEmptyToLengthRuleTest.java @@ -23,8 +23,8 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.Length; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral; import org.apache.doris.nereids.types.IntegerType; @@ -152,7 +152,7 @@ public void testStructElementEqualEmptyRewrite() { new StructField("f1", IntegerType.INSTANCE, true, ""), new StructField("f3", StringType.INSTANCE, true, "") )), true); - StructElement structElement = new StructElement(structSlot, new VarcharLiteral("f3")); + ElementAt structElement = new ElementAt(structSlot, new VarcharLiteral("f3")); VarcharLiteral empty = new VarcharLiteral(""); assertRuleRewrite( new EqualTo(structElement, empty), @@ -168,7 +168,7 @@ public void testStructElementReversedOperandsRewrite() { new StructField("f1", IntegerType.INSTANCE, true, ""), new StructField("f3", StringType.INSTANCE, true, "") )), true); - StructElement structElement = new StructElement(structSlot, new VarcharLiteral("f3")); + ElementAt structElement = new ElementAt(structSlot, new VarcharLiteral("f3")); VarcharLiteral empty = new VarcharLiteral(""); assertRuleRewrite( new EqualTo(empty, structElement), @@ -184,7 +184,7 @@ public void testNotStructElementEqualEmptyRewrite() { new StructField("f1", IntegerType.INSTANCE, true, ""), new StructField("f3", StringType.INSTANCE, true, "") )), true); - StructElement structElement = new StructElement(structSlot, new VarcharLiteral("f3")); + ElementAt structElement = new ElementAt(structSlot, new VarcharLiteral("f3")); VarcharLiteral empty = new VarcharLiteral(""); assertRuleRewrite( new Not(new EqualTo(structElement, empty)), diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index 4c200629ac0ca8..2cf2f2116bbcd2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -34,7 +34,7 @@ import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.scalar.Coalesce; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; @@ -891,7 +891,7 @@ public void testPushDownThroughJoin() { ).when(p -> { Assertions.assertEquals(2, p.getProjects().size()); Assertions.assertTrue(p.getProjects().stream() - .anyMatch(o -> o instanceof Alias && o.child(0) instanceof StructElement)); + .anyMatch(o -> o instanceof Alias && o.child(0) instanceof ElementAt)); return true; }), logicalOneRowRelation() @@ -947,7 +947,7 @@ public void testPushDownThroughWindow() { ).when(p -> { Assertions.assertEquals(2, p.getProjects().size()); Assertions.assertTrue(p.getProjects().stream() - .anyMatch(o -> o instanceof Alias && o.child(0) instanceof StructElement)); + .anyMatch(o -> o instanceof Alias && o.child(0) instanceof ElementAt)); return true; }) ) @@ -978,7 +978,7 @@ public void testPushDownThroughPartitionTopN() { ).when(p -> { Assertions.assertEquals(2, p.getProjects().size()); Assertions.assertTrue(p.getProjects().stream() - .anyMatch(o -> o instanceof Alias && o.child(0) instanceof StructElement)); + .anyMatch(o -> o instanceof Alias && o.child(0) instanceof ElementAt)); return true; }) ) @@ -1007,7 +1007,7 @@ public void testPushDownThroughUnion() { logicalOlapScan() ).when(p -> { Assertions.assertEquals(1, p.getProjects().size()); - Assertions.assertInstanceOf(StructElement.class, p.getProjects().get(0).child(0)); + Assertions.assertInstanceOf(ElementAt.class, p.getProjects().get(0).child(0)); return true; }) ).when(u -> { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProjectTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProjectTest.java index cb50616008ebd2..3984f0965d8b7f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProjectTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownUnnestInProjectTest.java @@ -20,7 +20,7 @@ import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.generator.Unnest; -import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.types.ArrayType; @@ -75,7 +75,7 @@ public void testPushDownMultipleUnnest() { logicalGenerate( logicalOlapScan() ) - ).when(p -> p.getProjects().stream().allMatch(ne -> ne.child(0) instanceof StructElement)) + ).when(p -> p.getProjects().stream().allMatch(ne -> ne.child(0) instanceof ElementAt)) ); } } diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out index 42abf47a7e0b3e..57368931c4828d 100644 --- a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out +++ b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out @@ -39,6 +39,41 @@ a \N abc \N \N \N NULL null \N +-- !select_struct_subscript_1 -- +1 2 3 4 5 +1 1000 10000000 100000000000 100000000000 +5 4 3 2 1 +1 \N 3 \N 5 +\N \N \N \N \N +\N \N \N \N \N + +-- !select_struct_subscript_2 -- +1.0 3.33 0.001 +1.0 2.143 0.001 +2.3 23.3 0.333 +2.0 \N 0.001 +\N \N \N +\N \N \N + +-- !select_struct_subscript_3 -- +2023-04-01 2023-04-01T12:00 2023-04-01 2023-04-01T12:00:01 +2023-04-01 2023-04-01T12:00 2023-04-01 2023-04-01T12:00:01 +2023-04-01 2023-04-01T12:00 2023-04-01 2023-04-01T12:00:01 +2023-04-01 \N 2023-04-01 \N +\N \N \N \N +\N \N \N \N + +-- !select_struct_subscript_4 -- +a abc abc +hi doris hello doris +a abc abc +a \N abc +\N \N \N +NULL null \N + +-- !select_struct_subscript_null -- +\N + -- !sql_before -- {"col_11":123.321, "col_12":{"col":1, "col1":345.24}} diff --git a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy index 97a099dbbbd6c4..b60915ab89213e 100644 --- a/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/struct_functions/test_struct_functions.groovy @@ -48,6 +48,27 @@ suite("test_struct_functions") { qt_select_struct_element_3 "SELECT element_at(k4,1),element_at(k4,2),element_at(k4,3),element_at(k4,4) FROM ${tableName} ORDER BY k1" qt_select_struct_element_4 "SELECT element_at(k5,1),element_at(k5,2),element_at(k5,3) FROM ${tableName} ORDER BY k1" + // DORIS-26105: subscript syntax `s['field']` / `s[index]` on STRUCT should work the same as + // element_at / struct_element and must not crash the BE (especially for NULL struct rows). + qt_select_struct_subscript_1 "SELECT k2['f1'],k2['f2'],k2['f3'],k2['f4'],k2['f5'] FROM ${tableName} ORDER BY k1" + qt_select_struct_subscript_2 "SELECT k3['f1'],k3['f2'],k3['f3'] FROM ${tableName} ORDER BY k1" + qt_select_struct_subscript_3 "SELECT k4[1],k4[2],k4[3],k4[4] FROM ${tableName} ORDER BY k1" + qt_select_struct_subscript_4 "SELECT k5[1],k5[2],k5[3] FROM ${tableName} ORDER BY k1" + + // exact repro from DORIS-26105: subscript on a struct column whose value is NULL + sql """ DROP TABLE IF EXISTS tbl_struct_subscript_null """ + sql """ + CREATE TABLE IF NOT EXISTS tbl_struct_subscript_null ( + id int, + profile STRUCT NULL + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + sql """ INSERT INTO tbl_struct_subscript_null VALUES(1, null) """ + qt_select_struct_subscript_null "SELECT profile['score'] FROM tbl_struct_subscript_null" + //The precision of the decimal type in the test select is inconsistent with the precision of the function named_struct containing the decimal type. sql """ drop table if exists t01 --force """; sql """ create table if not exists t01 (a decimal(6,3), d struct) properties ("replication_num"="1");""" From 94397e5b8cd7026029bc76ccaffe209076de7179 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Wed, 3 Jun 2026 22:22:03 +0800 Subject: [PATCH 2/2] [fix](struct) match struct field name case-insensitively Projecting a struct field by its original mixed-case name (e.g. reading a migrated Iceberg table whose schema keeps sName while Doris stores it as sname, then querying sName) failed with an opaque error: [INTERNAL_ERROR] function return type check failed ... real_return_type is nullptr. Root cause: struct field names are canonically lower-cased by the FE (both native DDL and the Iceberg schema read go through StructField.toLowerCase), but a query may reference a field with different casing. The FE matches the field case-insensitively and forwards the original-case name to the BE, while DataTypeStruct::try_get_position_by_name compared case-sensitively, so the field was not found, get_return_type returned nullptr and build() reported the opaque internal error. Fix: - DataTypeStruct::try_get_position_by_name / get_position_by_name match the field name case-insensitively (iequal). - element_at's struct return-type path throws the concrete "field not found / index out of bound" error instead of returning nullptr, so an unresolvable field surfaces a clear message rather than an internal error. DORIS-26117 Co-Authored-By: Claude Opus 4.8 --- be/src/core/data_type/data_type_struct.cpp | 8 ++- .../function/array/function_array_element.h | 12 ++--- .../function/function_struct_element_test.cpp | 52 +++++++++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/be/src/core/data_type/data_type_struct.cpp b/be/src/core/data_type/data_type_struct.cpp index ed1e56c51c5365..4b3d5291c317f9 100644 --- a/be/src/core/data_type/data_type_struct.cpp +++ b/be/src/core/data_type/data_type_struct.cpp @@ -44,6 +44,7 @@ #include "core/column/column_struct.h" #include "core/string_buffer.hpp" #include "core/string_ref.h" +#include "util/string_util.h" namespace doris { @@ -149,7 +150,7 @@ bool DataTypeStruct::equals(const IDataType& rhs) const { size_t DataTypeStruct::get_position_by_name(const String& name) const { size_t size = elems.size(); for (size_t i = 0; i < size; ++i) { - if (names[i] == name) { + if (iequal(names[i], name)) { return i; } } @@ -159,9 +160,12 @@ size_t DataTypeStruct::get_position_by_name(const String& name) const { } std::optional DataTypeStruct::try_get_position_by_name(const String& name) const { + // Struct field names are canonically lower-cased by the FE, but a query may reference a field + // with different casing (e.g. migrated Iceberg tables store sName as sname). Match the field + // name case-insensitively so projection resolves the field instead of failing on the BE. size_t size = elems.size(); for (size_t i = 0; i < size; ++i) { - if (names[i] == name) { + if (iequal(names[i], name)) { return std::optional(i); } } diff --git a/be/src/exprs/function/array/function_array_element.h b/be/src/exprs/function/array/function_array_element.h index b907fc4834ca4d..7745d5dce022fa 100644 --- a/be/src/exprs/function/array/function_array_element.h +++ b/be/src/exprs/function/array/function_array_element.h @@ -88,12 +88,12 @@ class FunctionArrayElement : public IFunction { if (arg_0->get_primitive_type() == TYPE_STRUCT) { const auto* struct_type = check_and_get_data_type(arg_0.get()); size_t index = 0; - // The index is always a constant literal for struct field access; if it cannot be - // resolved (out of bound / unknown name) we return nullptr and the framework reports it. - if (!get_struct_element_index(*struct_type, arguments[1].column, arguments[1].type, - &index) - .ok()) { - return nullptr; + // Throw the concrete error (field not found / out of bound) instead of returning + // nullptr, which the framework would report as an opaque "return type check failed". + Status st = get_struct_element_index(*struct_type, arguments[1].column, + arguments[1].type, &index); + if (!st.ok()) { + throw doris::Exception(st); } return make_nullable(struct_type->get_elements()[index]); } diff --git a/be/test/exprs/function/function_struct_element_test.cpp b/be/test/exprs/function/function_struct_element_test.cpp index 0f98955e85f70d..94998bf5bbe8b8 100644 --- a/be/test/exprs/function/function_struct_element_test.cpp +++ b/be/test/exprs/function/function_struct_element_test.cpp @@ -20,6 +20,7 @@ #include #include +#include "common/exception.h" #include "core/column/column_const.h" #include "core/column/column_struct.h" #include "core/data_type/data_type.h" @@ -114,4 +115,55 @@ TEST(FunctionStructElementTest, test_return_column) { EXPECT_TRUE(block.get_by_position(2).column->is_nullable()); } +TEST(FunctionStructElementTest, test_field_name_case_insensitive) { + auto index_type = std::make_shared(); + // query uses "KEY2" but the struct field is stored as "key2" + auto index_column = ColumnHelper::create_column({"KEY2"}); + + DataTypes struct_types = {std::make_shared(), std::make_shared(), + std::make_shared()}; + Strings names = {"key1", "key2", "key3"}; + auto type_struct = std::make_shared(struct_types, names); + + auto argument_template = ColumnsWithTypeAndName {{nullptr, type_struct, "struct"}, + {index_column, index_type, "index"}}; + + auto function = SimpleFunctionFactory::instance().get_function( + "struct_element", argument_template, + std::make_shared(std::make_shared()), {true}, + BeExecVersionManager::get_newest_version()); + + EXPECT_TRUE(function != nullptr); + EXPECT_EQ("Nullable(INT)", function->get_return_type()->get_name()); +} + +TEST(FunctionStructElementTest, test_field_not_found) { + auto index_type = std::make_shared(); + auto index_column = ColumnHelper::create_column({"not_exist_field"}); + + DataTypes struct_types = {std::make_shared(), std::make_shared(), + std::make_shared()}; + Strings names = {"key1", "key2", "key3"}; + auto type_struct = std::make_shared(struct_types, names); + + auto argument_template = ColumnsWithTypeAndName {{nullptr, type_struct, "struct"}, + {index_column, index_type, "index"}}; + + std::string err; + try { + auto function = SimpleFunctionFactory::instance().get_function( + "struct_element", argument_template, + std::make_shared(std::make_shared()), {true}, + BeExecVersionManager::get_newest_version()); + if (function != nullptr) { + static_cast(function->get_return_type()); + } + } catch (const doris::Exception& e) { + err = e.what(); + } + + std::cout << "field-not-found error: " << err << std::endl; + EXPECT_TRUE(err.find("not found") != std::string::npos); +} + } // namespace doris \ No newline at end of file