Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@
import com.apple.foundationdb.relational.util.Assert;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;

Check notice on line 62 in fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/visitors/ExpressionVisitor.java

View workflow job for this annotation

GitHub Actions / coverage

File coverage: 94.7% (463/489 lines) | Changed lines: 100.0% (6/6 lines)
import com.google.common.collect.Iterables;
import com.google.common.collect.Streams;
import com.google.protobuf.ZeroCopyByteString;
import org.antlr.v4.runtime.ParserRuleContext;
Expand Down Expand Up @@ -891,12 +892,22 @@
} else {
final var star = getDelegate().getSemanticAnalyzer().expandStar(Optional.of(id), getDelegate().getLogicalOperators());
final var resultValue = star.getUnderlying();
return Expression.ofUnnamed(resultValue);
// Name the column after the qualifier (table name or alias) that was expanded.
return Expression.of(resultValue, id);
}
}
if (ctx.STAR() != null) {
final var star = getDelegate().getSemanticAnalyzer().expandStar(Optional.empty(), getDelegate().getLogicalOperators());
final var resultValue = star.getUnderlying();
// When there is exactly one table in scope, name the column after that table/alias.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is not very accurate, technically PartiQL expansions are also one-table expansion that end up with multiple for-each columns, which should be avoided you do, correctly so.

You can make the comment more accurate by stating that the we avoid all types of joins (standard and partiQL unnest) since their result include attributes originating from different quantifiers anyways.

// With multiple tables the expansion is ambiguous, so fall back to an unnamed column.
final var forEachOps = getDelegate().getLogicalOperators().forEachOnly();
if (Iterables.size(forEachOps) == 1) {
final Optional<Identifier> tableName = Iterables.getOnlyElement(forEachOps).getName();
if (tableName.isPresent()) {
return Expression.of(resultValue, tableName.get());
Comment on lines +906 to +908
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
final Optional<Identifier> tableName = Iterables.getOnlyElement(forEachOps).getName();
if (tableName.isPresent()) {
return Expression.of(resultValue, tableName.get());
return Iterables.getOnlyElement(forEachOps).getName().map(name -> Expression.of(resultValue, name))
.orElse(Expression.ofUnnamed(resultValue));

}
}
return Expression.ofUnnamed(resultValue);
}
final var expressions = parseRecordFieldsUnderReorderings(ctx.expressionWithOptionalName());
Expand Down
3 changes: 2 additions & 1 deletion yaml-tests/src/test/java/CheckResultMetadataTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ static Stream<String> shouldPass() {
"array-of-struct-column", // array-of-struct with nested element field descriptors
"struct-type-name", // struct type name as optional prefix in field list
"field-named-array", // struct field named "array" — no clash with {array: ...} map syntax
"type-named-array" // struct type named "array" — no clash with {array: ...} map syntax
"type-named-array", // struct type named "array" — no clash with {array: ...} map syntax
"star-expression-metadata" // column names from SELECT (*) / SELECT (T.*) / SELECT (*) AS alias
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a test for SELECT (*) FROM VALUES (1, 2), (3, 4) as well?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make sure to add tests for PartiQL nesting vs. unnest syntax, something like:

  create type as struct s1(a bigint, b bigint)
  create type as struct s2(c bigint, d s1 array)
  create table t1(col1 bigint, col2 s2 array, primary key(col1))
  select (*) from (select * from t1, t1.col2 as k, k.d as m) as p
  select h.* from (select (*) from (select * from t1, t1.col2 as k, k.d as m) as h) 

and so on.

# star-expression-metadata.yamsql
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2015-2026 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Tests the result-set column names produced by parenthesised-star SELECT expressions.
#
# Parenthesised-star forms — SELECT (*) and SELECT (T.*) — pack the entire row into a
# single struct-typed column. The outer column name is inferred as follows:
#
# SELECT (T.*) → column named T (explicit qualifier used as name)
# SELECT (*) single source → column named after the sole table/alias/function in scope
# SELECT (*) / (T.*) AS X → explicit alias overrides inferred name
# SELECT (*) multi-table → column is anonymous ("_0") — source is ambiguous
---
schema_template:
create table foo(id bigint, val bigint, primary key(id))
create table bar(bid bigint, bval bigint, primary key(bid))
---
transaction_setups:
tvf_sq: create temporary function sq(in x bigint) on commit drop function AS
SELECT id, val FROM foo WHERE id > x
---
setup:
steps:
- query: INSERT INTO foo VALUES (1, 10)
- query: INSERT INTO bar VALUES (1, 20)
---
test_block:
name: table-source
tests:
-
# Case 1: SELECT (*) FROM FOO → single table in scope → column named FOO
- query: SELECT (*) FROM foo
- resultMetadata: [{FOO: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{FOO: {ID: 1, VAL: 10}}]
-
# Case 2: SELECT (FOO.*) FROM FOO → qualifier is used as column name
- query: SELECT (foo.*) FROM foo
- resultMetadata: [{FOO: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{FOO: {ID: 1, VAL: 10}}]
-
# Case 3: SELECT (*) FROM FOO AS BAR → table alias in scope → column named BAR
- query: SELECT (*) FROM foo AS bar
- resultMetadata: [{BAR: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{BAR: {ID: 1, VAL: 10}}]
-
# Case 4: SELECT (BAR.*) FROM FOO AS BAR → qualifier (the alias) is used as column name
- query: SELECT (bar.*) FROM foo AS bar
- resultMetadata: [{BAR: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{BAR: {ID: 1, VAL: 10}}]
-
# Case 5: SELECT (*) AS BAR FROM FOO → explicit alias overrides inferred name
- query: SELECT (*) AS bar FROM foo
- resultMetadata: [{BAR: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{BAR: {ID: 1, VAL: 10}}]
-
# Case 6: SELECT (FOO.*) AS BAR FROM FOO → explicit alias overrides qualifier name
- query: SELECT (foo.*) AS bar FROM foo
- resultMetadata: [{BAR: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{BAR: {ID: 1, VAL: 10}}]
---
test_block:
name: subquery-source
tests:
-
# Subquery with alias → column named after the alias
- query: SELECT (*) FROM (SELECT id, val FROM foo) AS sub
- resultMetadata: [{SUB: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{SUB: {ID: 1, VAL: 10}}]
-
# (T.*) where T is a subquery alias → same rule as for a table alias
- query: SELECT (sub.*) FROM (SELECT id, val FROM foo) AS sub
- resultMetadata: [{SUB: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{SUB: {ID: 1, VAL: 10}}]
---
test_block:
name: function-source
tests:
-
# TVF without alias → column named after the function
- query: SELECT (*) FROM sq(0)
- setupReference: tvf_sq
- resultMetadata: [{SQ: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{SQ: {ID: 1, VAL: 10}}]
-
# TVF with alias → alias wins
- query: SELECT (*) FROM sq(0) AS f
- setupReference: tvf_sq
- resultMetadata: [{F: [{ID: BIGINT}, {VAL: BIGINT}]}]
- result: [{F: {ID: 1, VAL: 10}}]
---
test_block:
name: values-source
tests:
-
# VALUES without alias has no name → source is anonymous, column is _0
- query: SELECT (*) FROM VALUES (1, 2), (3, 4)
- resultMetadata: [{_0: [{_0: INTEGER}, {_1: INTEGER}]}]
- unorderedResult: [{_0: {_0: 1, _1: 2}}, {_0: {_0: 3, _1: 4}}]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we can also add a test for a named values source, I tested it locally and I think it works correctly

Suggested change
- unorderedResult: [{_0: {_0: 1, _1: 2}}, {_0: {_0: 3, _1: 4}}]
- unorderedResult: [{_0: {_0: 1, _1: 2}}, {_0: {_0: 3, _1: 4}}]
-
# VALUES with in-line table definition → column named after the table definition
- query: SELECT (*) FROM VALUES (1, 2), (3, 4) AS X(A, B)
- resultMetadata: [{X: [{A: INTEGER}, {B: INTEGER}]}]
- unorderedResult: [{X: {A: 1, B: 2}}, {X: {A: 3, B: 4}}]

---
test_block:
name: ambiguous-join-source
tests:
-
# Two tables in scope → source is ambiguous, column is anonymous
- query: SELECT (*) FROM foo, bar WHERE foo.id = bar.bid
- resultMetadata: [{_0: [{ID: BIGINT}, {VAL: BIGINT}, {BID: BIGINT}, {BVAL: BIGINT}]}]
- result: [{_0: {ID: 1, VAL: 10, BID: 1, BVAL: 20}}]
...
4 changes: 2 additions & 2 deletions yaml-tests/src/test/resources/join-tests.yamsql
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ test_block:
-
# ambiguous sub select is fine if the top-level query does not specify column names
- query: select (*) from (select dept.name, project.name from emp, dept, project where emp.dept_id = dept.id and project.emp_id = emp.id) X;
- explain: "SCAN([IS DEPT]) | FLATMAP q0 -> { SCAN([IS PROJECT]) | FLATMAP q1 -> { SCAN([IS EMP]) | FILTER _.DEPT_ID EQUALS q0.ID AND q1.EMP_ID EQUALS _.ID AS q2 RETURN q1 } AS q1 RETURN ((q0.NAME AS _0, q1.NAME AS _1) AS _0) }"
- explain: "SCAN([IS DEPT]) | FLATMAP q0 -> { SCAN([IS PROJECT]) | FLATMAP q1 -> { SCAN([IS EMP]) | FILTER _.DEPT_ID EQUALS q0.ID AND q1.EMP_ID EQUALS _.ID AS q2 RETURN q1 } AS q1 RETURN ((q0.NAME AS _0, q1.NAME AS _1) AS X) }"
- unorderedResult: [{{"Engineering", "OLAP"}},
{{"Sales", "Feedback"}},
{{"Marketing", "SEO"}}]
Expand Down Expand Up @@ -504,7 +504,7 @@ test_block:
-
# ambiguous sub-select is fine if the top-level query does not specify column names (select (*))
- query: select (*) from (select dept.name, project.name from emp, dept, project where emp.dept_id = dept.id and project.emp_id = emp.id) X;
- explain: "SCAN([IS DEPT]) | FLATMAP q0 -> { SCAN([IS PROJECT]) | FLATMAP q1 -> { SCAN([IS EMP]) | FILTER _.DEPT_ID EQUALS q0.ID AND q1.EMP_ID EQUALS _.ID AS q2 RETURN q1 } AS q1 RETURN ((q0.NAME AS _0, q1.NAME AS _1) AS _0) }"
- explain: "SCAN([IS DEPT]) | FLATMAP q0 -> { SCAN([IS PROJECT]) | FLATMAP q1 -> { SCAN([IS EMP]) | FILTER _.DEPT_ID EQUALS q0.ID AND q1.EMP_ID EQUALS _.ID AS q2 RETURN q1 } AS q1 RETURN ((q0.NAME AS _0, q1.NAME AS _1) AS X) }"
- unorderedResult: [{{"Engineering", "OLAP"}},
{{"Sales", "Feedback"}},
{{"Marketing", "SEO"}}]
Expand Down
Loading
Loading